// Anime4K_Restore_CNN_VL
// 移植自 https://github.com/bloc97/Anime4K/blob/master/glsl/Restore/Anime4K_Restore_CNN_VL.glsl

//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Restore_3
//!USE MulAdd
//!CAPABILITY FP16

#include "../StubDefs.hlsli"


//!TEXTURE
Texture2D INPUT;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
Texture2D OUTPUT;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex1;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex2;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex3;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex4;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex5;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex6;

//!SAMPLER
//!FILTER POINT
SamplerState sam;


//!PASS 1
//!DESC Conv-4x3x3x3
//!IN INPUT
//!OUT tex1, tex2
//!BLOCK_SIZE 16
//!NUM_THREADS 64

void Pass1(uint2 blockStart, uint3 threadId) {
	uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	uint i, j;

	MF3 src[4][4];
	[unroll]
	for (i = 0; i <= 2; i += 2) {
		[unroll]
		for (j = 0; j <= 2; j += 2) {
			float2 tpos = (gxy + uint2(i, j)) * inputPt;
			const MF4 sr = INPUT.GatherRed(sam, tpos);
			const MF4 sg = INPUT.GatherGreen(sam, tpos);
			const MF4 sb = INPUT.GatherBlue(sam, tpos);

			// w z
			// x y
			src[i][j] = MF3(sr.w, sg.w, sb.w);
			src[i][j + 1] = MF3(sr.x, sg.x, sb.x);
			src[i + 1][j] = MF3(sr.z, sg.z, sb.z);
			src[i + 1][j + 1] = MF3(sr.y, sg.y, sb.y);
		}
	}

	[unroll]
	for (i = 1; i <= 2; ++i) {
		[unroll]
		for (j = 1; j <= 2; ++j) {
			uint2 destPos = gxy + uint2(i - 1, j - 1);

			if (i != 1 || j != 1) {
				if (destPos.x >= inputSize.x || destPos.y >= inputSize.y) {
					continue;
				}
			}

			MF4 target1 = { -0.046043985, 0.055581126, -0.08791638, -0.13022089 };
			target1 = MulAdd(src[i - 1][j - 1], MF3x4(0.1690102, -0.2560719, 0.39658326, -0.3679659, -0.27616683, -0.35619372, -0.3748396, 0.08430813, -0.29574734, -0.31511316, -0.09773105, 0.13616018), target1);
			target1 = MulAdd(src[i - 1][j], MF3x4(-0.1326393, -0.259433, 0.025070239, 0.58914864, -0.036478516, 0.30723435, 0.007458902, 0.012962684, 0.2493056, 0.13007334, -0.08448256, -0.38414413), target1);
			target1 = MulAdd(src[i - 1][j + 1], MF3x4(-0.11539356, 0.35253766, 0.26143202, 0.2760807, -0.09371543, -0.028165473, -0.028452158, -0.27050856, 0.06718067, -0.0056619495, -0.17654495, 0.17288211), target1);
			target1 = MulAdd(src[i][j - 1], MF3x4(-0.16145481, -0.3204927, -0.54317135, 0.11830119, 0.49315026, 0.12008072, 0.50857407, -0.30382085, 0.25807253, 0.020755528, 0.29388228, 0.106109895), target1);
			target1 = MulAdd(src[i][j], MF3x4(-0.22728722, 0.50484747, -0.07904469, 0.33114597, 0.50306976, -0.22760947, 0.14773269, 0.17628263, 0.14788547, -0.08223464, -0.10880935, -0.3151985), target1);
			target1 = MulAdd(src[i][j + 1], MF3x4(0.3414351, 0.057279214, -0.14419858, 0.09761111, -0.11794496, 0.021717256, -0.22750235, 0.13986664, -0.38932344, 0.28996095, 0.3773904, 0.13175532), target1);
			target1 = MulAdd(src[i + 1][j - 1], MF3x4(0.1376552, -0.19587159, -0.35147396, -0.097646296, 0.1686707, -0.14385861, 0.031198, 0.12383533, -0.23089902, 0.08707301, 0.3362293, -0.100579016), target1);
			target1 = MulAdd(src[i + 1][j], MF3x4(-0.056774966, 0.047585852, -0.36395878, -0.20211312, 0.4077735, 0.12631284, 0.39813092, -0.033365678, 0.2307249, -0.09131807, 0.20823865, 0.31084216), target1);
			target1 = MulAdd(src[i + 1][j + 1], MF3x4(-0.12456089, 0.09755632, 0.31490886, -0.06579996, -0.13386595, 0.07564795, -0.26605195, -0.075180635, -0.11182657, 0.06757017, -0.14351276, -0.16828312), target1);

			MF4 target2 = { -0.0022791293, -0.024132347, -0.57621074, 0.028573977 };
			target2 = MulAdd(src[i - 1][j - 1], MF3x4(-0.15485518, -0.29363206, -0.22610365, -0.14291525, -0.45240572, -0.18319772, -0.12209436, 0.15031648, 0.09878383, 0.06711082, 0.25763842, -0.084633484), target2);
			target2 = MulAdd(src[i - 1][j], MF3x4(-0.10204406, 0.16167697, 0.22371867, -0.37947702, -0.24476196, -0.038824454, 0.060157117, 0.15764871, -0.08072927, -0.2210841, -0.31835055, 0.009979876), target2);
			target2 = MulAdd(src[i - 1][j + 1], MF3x4(0.20506924, 0.21132155, -0.0922578, -0.07430473, 0.14529926, 0.20549752, 0.0077948375, 0.13246094, -0.32353187, 0.21074104, 0.092629515, 0.17590871), target2);
			target2 = MulAdd(src[i][j - 1], MF3x4(0.04125819, -0.44050243, 0.23729716, 0.3218237, 0.12943116, -0.011674174, 0.10390632, 0.027775545, -0.20308031, -0.16904089, -0.2121676, -0.022515794), target2);
			target2 = MulAdd(src[i][j], MF3x4(0.09664124, 0.20127031, 0.60345304, 0.16697013, 0.23093723, -0.38116834, 0.109695725, 0.0007595324, 0.4092646, 0.009624758, 0.11229678, 0.25326383), target2);
			target2 = MulAdd(src[i][j + 1], MF3x4(0.014879592, 0.19204311, 0.07102085, -0.7312604, 0.34860876, 0.3429918, -0.027331594, 0.27636307, 0.1342437, 0.107820466, -0.12645108, 0.21081445), target2);
			target2 = MulAdd(src[i + 1][j - 1], MF3x4(-0.12687613, -0.09247973, -0.25973785, 0.4350873, -0.18987224, 0.028678741, -0.0903819, -0.63974863, 0.205591, 0.11308998, 0.18458389, -0.4149041), target2);
			target2 = MulAdd(src[i + 1][j], MF3x4(0.34691808, -0.025498383, 0.3428986, 0.21663484, 0.23404741, -0.1725327, -0.0036315925, -0.13299675, -0.1873967, 0.031331502, -0.08785591, -0.0013278709), target2);
			target2 = MulAdd(src[i + 1][j + 1], MF3x4(-0.35846514, 0.048703704, -0.104165934, 0.16529736, -0.15378916, 0.26030356, -0.07134151, 0.03692383, -0.15807101, -0.18885155, 0.044707954, -0.11444462), target2);

			tex1[destPos] = target1;
			tex2[destPos] = target2;
		}
	}
}


//!PASS 2
//!DESC Conv-4x3x3x16
//!IN tex1, tex2
//!OUT tex3, tex4
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass2(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	MF4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y),0);
	MF4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0),0);
	MF4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y),0);
	MF4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y),0);
	MF4 e1 = tex1.SampleLevel(sam, pos,0);
	MF4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y),0);
	MF4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y),0);
	MF4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0),0);
	MF4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y),0);

	MF4 na1 = max(-a1, 0);
	MF4 nb1 = max(-b1, 0);
	MF4 nc1 = max(-c1, 0);
	MF4 nd1 = max(-d1, 0);
	MF4 ne1 = max(-e1, 0);
	MF4 nf1 = max(-f1, 0);
	MF4 ng1 = max(-g1, 0);
	MF4 nh1 = max(-h1, 0);
	MF4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);

	MF4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e2 = tex2.SampleLevel(sam, pos, 0);
	MF4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na2 = max(-a2, 0);
	MF4 nb2 = max(-b2, 0);
	MF4 nc2 = max(-c2, 0);
	MF4 nd2 = max(-d2, 0);
	MF4 ne2 = max(-e2, 0);
	MF4 nf2 = max(-f2, 0);
	MF4 ng2 = max(-g2, 0);
	MF4 nh2 = max(-h2, 0);
	MF4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);

	MF4 target1 = { 0.018166734, -0.11002478, -0.05554318, -0.0988193 };
	target1 = MulAdd(a1, MF4x4(0.010346764, 0.07230188, -0.24734616, -0.09937907, 0.02228549, -0.19550583, -0.019540425, -0.1037373, 0.033996485, -0.075554, -0.20228972, 0.07090153, -0.09194035, -0.058972966, 0.1768268, 0.27517542), target1);
	target1 = MulAdd(b1, MF4x4(0.020078976, 0.12433655, -0.1620775, 0.036401592, 0.079748705, 0.11660013, 0.17917652, -0.017513236, -0.18936846, 0.24478136, -0.45726213, -0.045004416, -0.08295188, 0.067733586, -0.080548316, 0.2744211), target1);
	target1 = MulAdd(c1, MF4x4(0.024916803, 0.27562472, 0.043771956, -0.012240604, 0.0786355, 0.042651594, 0.16049327, -0.14577515, -0.032735053, 0.17658092, 0.16382934, -0.02337374, 0.11551492, 0.056343183, -0.17930213, 0.14259394), target1);
	target1 = MulAdd(d1, MF4x4(0.20010485, 0.06747722, -0.19026905, 0.11013709, 0.13062745, -0.044626113, -0.0062261797, 0.2189639, 0.1403497, -0.022713251, -0.19452858, -0.010305412, -0.06407589, 0.09836748, 0.025805516, 0.23430973), target1);
	target1 = MulAdd(e1, MF4x4(-0.14664203, 0.034910418, 0.024714258, -0.066872925, -0.15717538, -0.14179383, -0.14091893, 0.05859166, 0.18919097, -0.18544437, -0.09068573, -0.08615929, -0.051434122, 0.2170678, 0.18409058, -0.17461225), target1);
	target1 = MulAdd(f1, MF4x4(-0.11354446, 0.10745854, 0.2682663, 0.05949201, -0.10695986, 0.1407851, -0.03551388, 0.10691649, -0.17148238, -0.38287184, 0.2074456, 0.11828914, 0.048535194, 0.1464864, -0.18169662, -0.14074169), target1);
	target1 = MulAdd(g1, MF4x4(0.22160622, -0.1513045, -0.053284165, 0.033202525, 0.15574448, -0.043640967, -0.0093824165, -0.0019965349, -0.097964935, -0.08289824, 0.08239996, 0.07868361, 0.05731752, -0.20441617, -0.013016076, -0.253108), target1);
	target1 = MulAdd(h1, MF4x4(-0.031249097, -0.2272863, 0.23573665, 0.03357689, 0.011395065, -0.10885564, -0.06287508, -0.031719524, 0.10331069, 0.17560169, 0.18303394, 0.022961004, -0.17011635, -0.24371737, 0.10678694, -0.3222825), target1);
	target1 = MulAdd(i1, MF4x4(-0.1275465, -0.08844758, 0.10994917, -0.00910273, 0.09393154, 0.03894992, 0.14367905, -0.11811715, -0.09077633, -0.015776094, 0.27427456, -0.13283503, 0.18724327, -0.08139094, 0.04933602, -0.051852766), target1);
	target1 = MulAdd(a2, MF4x4(-0.06764611, -0.27426586, 0.12045272, 0.09410856, -0.14258035, 0.11802992, -0.09093882, 0.0022018093, 0.4590643, 0.046258576, -0.07827223, 0.448011, -0.103631735, -0.016930219, -0.15421398, 0.11045997), target1);
	target1 = MulAdd(b2, MF4x4(-0.17295076, 0.00151352, 0.14938255, 0.08336512, -0.07496541, -0.07561223, -0.0846474, 0.14979269, -0.09142163, 0.23925088, -0.015199518, -0.37749895, -0.20636298, -0.022585187, -0.20371509, 0.0745308), target1);
	target1 = MulAdd(c2, MF4x4(0.06458832, -0.009722021, -0.123604394, 0.06548835, -0.3039139, -0.022024399, 0.05297587, -0.0626883, 0.23556642, 0.1516464, -0.07004877, -0.1845364, -0.05918428, 0.19158973, -0.14983447, 0.030489758), target1);
	target1 = MulAdd(d2, MF4x4(0.36604697, 0.17516142, -0.10853731, -0.22694224, -0.107650936, 0.23013335, 0.094055794, -0.17047717, -0.3006048, -0.08621717, -0.18815655, -0.03570218, 0.09676118, -0.017718751, 0.059138596, 0.073388465), target1);
	target1 = MulAdd(e2, MF4x4(-0.12791575, 0.101956226, 0.13091874, -0.046373338, 0.04955811, -0.04030444, 0.13869923, -0.046699073, -0.42611042, -0.7173929, 0.052184317, 0.6178025, -0.02929954, -0.07638965, -0.15000828, 0.030710017), target1);
	target1 = MulAdd(f2, MF4x4(0.057806686, 0.20842272, -0.20148766, 0.006666912, 0.13356528, -0.45265228, -0.07354092, 0.21447696, 0.019552143, -0.13645506, 0.14643854, -0.0071413796, -0.15487236, -0.002250615, 0.30622452, 0.0033902125), target1);
	target1 = MulAdd(g2, MF4x4(0.06896002, 0.24397352, -0.06479052, 0.20676947, -0.24259068, 0.055320013, -0.09032122, -0.11222854, -0.08982342, -0.114818625, -0.06399291, -0.3024516, -0.06302166, -0.1925528, 0.03458982, 0.028828239), target1);
	target1 = MulAdd(h2, MF4x4(0.09764086, 0.09599894, -0.0073313303, 0.14418933, -0.045712367, 0.12657364, 0.04620374, -0.069778584, 0.30047333, -0.012418192, 0.15516461, -0.18087754, 0.08178273, 0.14262857, -0.01741533, -0.12509112), target1);
	target1 = MulAdd(i2, MF4x4(0.04697884, -0.1506804, 0.031823065, 0.13397239, -0.18396698, 0.10681781, -0.29586303, -0.0039136545, 0.17560847, -0.12486726, -0.018646788, -0.20688744, -0.030614454, -0.0527634, 0.23593572, -0.10542146), target1);
	target1 = MulAdd(na1, MF4x4(-0.19182229, -0.32615846, 0.26283535, -0.1371942, -0.071202695, 0.12056063, -0.11450658, -0.27711076, -0.42096004, 0.0014352369, 0.1559669, -0.14464542, -0.17973948, 0.079166576, -0.12501791, -0.20623216), target1);
	target1 = MulAdd(nb1, MF4x4(0.12469872, 0.32190827, -0.059510354, 0.1393449, -0.12845798, -0.019571869, -0.22630808, -0.14031963, 0.36072046, 0.05858427, 0.19278921, 0.121090546, -0.067538865, -0.018770566, 0.14318037, -0.15561756), target1);
	target1 = MulAdd(nc1, MF4x4(0.024663208, 0.21110268, -0.016415706, 0.060093414, -0.03739678, -0.107412934, -0.077527136, 0.30331334, 0.17196326, -0.15512557, -0.09499732, -0.15748607, -0.16680105, -0.015185634, 0.16114107, -0.21288376), target1);
	target1 = MulAdd(nd1, MF4x4(-0.17739037, -0.1190967, 0.13191372, -0.2527187, -0.14992718, -0.30511454, 0.19145966, 0.002194003, -0.12888977, 0.19152176, 0.27528167, 0.099714965, 0.12865707, -0.12051514, -0.055013947, 0.26231763), target1);
	target1 = MulAdd(ne1, MF4x4(0.46433613, -0.11708138, -0.20157282, 0.32022122, 0.079468675, 0.029407484, 0.2559102, -0.15651533, 0.08644574, -0.09747344, -0.07528584, 0.17354868, 0.19167562, -0.17698488, -0.09896657, 0.17093097), target1);
	target1 = MulAdd(nf1, MF4x4(-0.20283653, -0.33680332, 0.2282385, 0.18832158, 0.20866042, 0.00076752366, 0.16471444, -0.21548858, 0.16193539, 0.17141372, 0.03140222, 0.03913644, -0.030161971, 0.00014570929, 0.08993654, -0.064823024), target1);
	target1 = MulAdd(ng1, MF4x4(-0.3075755, 0.19942546, 0.015526995, -0.120868504, -0.254515, -0.07791228, 0.03271691, 0.11794217, 0.11258601, 0.045204375, -0.061196107, -0.115958795, 0.3861869, 0.048215542, 0.07016682, -0.009975758), target1);
	target1 = MulAdd(nh1, MF4x4(-0.07623697, 0.16094944, -0.02283455, 0.14112763, -0.051149167, 0.20429814, 0.011314802, 0.18914083, -0.24240434, -0.08784008, -0.16763984, -0.08492233, 0.31062725, -0.11925119, -0.33195966, 0.2060798), target1);
	target1 = MulAdd(ni1, MF4x4(-0.016709225, -0.14472668, -0.3677625, -0.09832719, 0.030297454, -0.05775362, -0.1401375, 0.08119674, -0.01795042, 0.05183797, -0.24320887, 0.066842034, -0.22245285, -0.02740993, 0.06316751, 0.053399116), target1);
	target1 = MulAdd(na2, MF4x4(-0.039214406, -0.08876633, 0.045552462, 0.19226661, 0.1355001, -0.13942362, 0.17398876, 0.2914014, -0.191809, 0.037143208, 0.013333581, -0.16632195, 0.113767646, -0.106692605, 0.1589787, 0.030107044), target1);
	target1 = MulAdd(nb2, MF4x4(0.21997562, 0.13855208, -0.05783191, -0.033682413, -0.010961168, 0.10524961, 0.02177416, 0.18289444, 0.043692037, 0.07853899, -0.039936125, -0.1004449, 0.04494073, -0.020680292, 0.17578089, -0.106598996), target1);
	target1 = MulAdd(nc2, MF4x4(0.026852835, -0.16037546, 0.11278316, 0.12656097, -0.006857894, -0.03400118, -0.051564034, 0.00085412664, -0.37556714, -0.05279987, 0.029383834, -0.14246808, -0.056380164, -0.002399925, 0.16025752, 0.036324855), target1);
	target1 = MulAdd(nd2, MF4x4(0.022709966, 0.046350412, 0.03390721, 0.02810572, -0.14394265, 0.04215361, -0.3206118, 0.15034916, -0.0028448137, 0.1682989, -0.042686664, 0.020543462, -0.2786501, -0.007482015, -0.040313292, -0.20745736), target1);
	target1 = MulAdd(ne2, MF4x4(0.05417556, 0.18728684, -0.046121832, -0.27939513, 0.05907976, -0.09191223, -0.16625418, -0.26038164, 0.39956605, -0.052594025, -0.0596556, 0.29517552, -0.015181923, -0.0763375, 0.25131205, 0.13038464), target1);
	target1 = MulAdd(nf2, MF4x4(-0.036903054, -0.0066989153, -0.062650286, 0.05614359, -0.0064960583, 0.028512698, -0.10906273, -0.010047654, 0.23030473, 0.049983572, 0.10439064, 0.26643834, 0.05041243, 0.09185424, -0.32352915, 0.11295159), target1);
	target1 = MulAdd(ng2, MF4x4(0.09724027, -0.34962535, 0.06586686, 0.016635379, 0.13831381, 0.01707076, -0.04690347, 0.022350075, 0.018352794, 0.022000022, 0.070613205, 0.117735535, -0.025971051, 0.18832101, -0.09643588, -0.08512127), target1);
	target1 = MulAdd(nh2, MF4x4(-0.17324433, 0.06810613, -0.057295907, -0.05115964, -0.101570815, 0.12491774, 0.08762367, -0.005862404, -0.05342927, -0.031942457, -0.039624047, -0.04298937, -0.1303138, -0.11869282, -0.024832053, 0.070463404), target1);
	target1 = MulAdd(ni2, MF4x4(-0.010514842, 0.1376259, -0.11750346, -0.03786737, 0.03459249, 0.015408171, -0.031430878, -0.060825355, -0.072958425, -0.0037895301, 0.041686177, -0.12352204, -0.06261361, 0.054514423, -0.34072715, 0.13860728), target1);
	
	MF4 target2 = { 0.022609137, -0.028548084, 0.024431901, 0.010504478 };
	target2 = MulAdd(a1, MF4x4(-0.040142782, 0.0288423, 0.07569487, -0.01490842, 0.14402796, -0.13682005, 0.027765118, 0.03907358, 0.07117706, 0.058157545, -0.23862502, -0.057674367, -0.19220531, 0.0147159435, -0.18028538, 0.0963821), target2);
	target2 = MulAdd(b1, MF4x4(-0.1676744, -0.11937339, 0.12137117, 0.07119485, 0.14148116, -0.043578617, -0.029261118, -0.0016938087, -0.057269357, -0.080076694, 0.12193026, 0.07326153, -0.056278303, -0.01630716, -0.03792076, 0.1483611), target2);
	target2 = MulAdd(c1, MF4x4(-0.3021578, 0.011601693, 0.11266048, 0.19086999, -0.0122412145, 0.08431291, 0.11615175, -0.008039614, -0.39987534, 0.07820729, 0.03509667, 0.1963505, -0.08839513, -0.21571854, 0.059425723, -0.06830175), target2);
	target2 = MulAdd(d1, MF4x4(0.23135209, -0.12452708, 0.0943565, 0.0028859286, -0.09836373, 0.10681712, -0.3535964, 0.08457615, 0.045332734, 0.16579892, -0.03809797, -0.021596594, 0.2937497, -0.028294371, 0.046484597, -0.037604347), target2);
	target2 = MulAdd(e1, MF4x4(0.072675414, -0.16431206, 0.28952035, 0.0076831076, -0.020242939, 0.029483542, -0.092415355, 0.08673106, 0.12109694, 0.14307201, 0.23134442, 0.11731775, 0.09981601, -0.16968462, 0.037470713, 0.14948717), target2);
	target2 = MulAdd(f1, MF4x4(0.0029752052, 0.06526503, 0.1866458, 0.07451277, -0.31836876, 0.17115082, -0.13969697, 0.23844297, -0.03244903, -0.08832665, 0.023691226, -0.18230624, -0.074933805, -0.00044301842, 0.050572682, 0.081511915), target2);
	target2 = MulAdd(g1, MF4x4(0.039502528, 0.051221415, -0.13968123, -0.091212444, -0.016925618, 0.15409444, -0.017455677, -0.11653652, 0.03539446, -0.00087720866, -0.12839639, 0.037198763, 0.03674469, -0.26444665, 0.019721227, -0.13013805), target2);
	target2 = MulAdd(h1, MF4x4(0.039229527, 0.25667152, 0.0032586441, -0.00718359, 0.1617932, 0.10409968, 0.07182867, -0.09810605, 0.07789241, -0.02014911, 0.025767172, -0.14604759, 0.07175764, 0.32513744, -0.20473222, -0.16266066), target2);
	target2 = MulAdd(i1, MF4x4(0.13418433, 0.061813723, -0.13927278, -0.2498272, 0.03468218, 0.29483125, 0.063289374, -0.04726235, 0.1898295, -0.33132064, 0.032045014, 0.02159535, -0.1148363, 0.31306976, 0.06456038, 0.048988886), target2);
	target2 = MulAdd(a2, MF4x4(0.07151646, 0.2799246, -0.107190795, -0.16431166, -0.28007045, 0.07206954, 0.06775463, 0.009758042, 0.07032184, -0.20843789, 0.087045245, 0.1360676, -0.25718534, 0.028249472, -0.12614648, 0.009949602), target2);
	target2 = MulAdd(b2, MF4x4(0.020241471, -0.23390484, -0.0083223935, 0.08344701, 0.08222297, 0.12026539, -0.08652223, -0.08228822, -0.039576706, -0.24677879, -0.1157289, 0.2590508, -0.23809408, 0.19911982, -0.116798095, -0.035870325), target2);
	target2 = MulAdd(c2, MF4x4(0.024991842, 0.050509237, -0.024134455, -0.12659028, 0.24089767, 0.122712664, -0.10482493, -0.19403952, -0.19177693, -0.06538376, -0.041478425, 0.32176673, -0.1534002, -0.18680622, 0.06763643, 0.020806564), target2);
	target2 = MulAdd(d2, MF4x4(0.03437814, -0.28067374, 0.2830681, 0.038812317, -0.021698112, -0.120865285, 0.22695538, -0.045419116, -0.030475847, -0.01977341, -0.1265364, -0.3109814, 0.012255813, 0.053917278, -0.018620957, -0.14599285), target2);
	target2 = MulAdd(e2, MF4x4(-0.016204128, -0.04093018, 0.054571863, 0.02679643, 0.01756274, -0.057685968, 0.16148666, 0.17370272, -0.11065411, 0.06378157, -0.09331551, 0.22985275, 0.057905316, 0.12323568, 0.07748665, 0.09878629), target2);
	target2 = MulAdd(f2, MF4x4(-0.018112244, 0.063234635, -0.013184602, 0.16241394, 0.08877139, 0.02145378, -0.02490027, -0.038920373, 0.13127136, 0.14391647, 0.020553736, 0.14401346, 0.06685973, -0.25398204, 0.10369067, -0.055949755), target2);
	target2 = MulAdd(g2, MF4x4(0.07710333, 0.047412727, 0.13813803, 0.18624061, 0.16907091, -0.039532468, 0.06234584, 0.06408178, -0.054543987, -0.045220226, -0.11093376, -0.37399602, 0.20372874, 0.004580967, -0.07742308, 0.017989937), target2);
	target2 = MulAdd(h2, MF4x4(0.003485311, -0.08897399, -0.013108594, -0.19473282, -0.27081844, -0.16812073, 0.0052992934, -0.055331517, 0.09446357, 0.019280333, 0.16560757, -0.3230032, 0.043096773, 0.059222896, -0.064184934, -0.059852477), target2);
	target2 = MulAdd(i2, MF4x4(0.06794279, -0.034135245, 0.083064295, 0.13506731, 0.13064219, -0.44978833, -0.03513717, 0.08999715, 0.1124541, 0.42208397, -0.0038724816, -0.014332087, -0.13751853, -0.04929869, 0.09134992, -0.17687531), target2);
	target2 = MulAdd(na1, MF4x4(0.100909084, -0.0131197255, 0.082274795, -0.2138443, -0.08515947, -0.021058358, 0.10951775, -0.06349191, -0.29129833, -0.029262653, 0.25235432, -0.11748315, 0.121980384, 0.062347785, 0.10916932, -0.15993518), target2);
	target2 = MulAdd(nb1, MF4x4(0.28893283, -0.05677308, -0.2641288, -0.058937225, -0.16187571, 0.006647366, -0.063294955, 0.04766719, 0.60601914, -0.07831864, -0.15710756, -0.011491797, 0.15587467, -0.08105375, 0.07847514, -0.2803333), target2);
	target2 = MulAdd(nc1, MF4x4(-0.077989794, -0.09871811, -0.3516344, 0.15292728, 0.010889273, 0.0011189661, -0.16118282, -0.018821161, -0.039708678, -0.00060983415, -0.06367813, 0.009148068, 0.03919827, 0.18782744, 0.028040757, -0.10230145), target2);
	target2 = MulAdd(nd1, MF4x4(-0.4079609, 0.18640275, -0.12475227, 0.13891742, 0.25121725, 0.16942379, 0.14409852, 0.087600805, 0.045335658, -0.12683709, -0.0077387216, 0.06563413, -0.19857128, 0.106910795, -0.048285246, 0.10768945), target2);
	target2 = MulAdd(ne1, MF4x4(0.5989075, 0.20941062, -0.20086494, 0.13344856, 0.073034994, 0.22358665, 0.101664364, -0.13463663, 0.18816395, -0.061176624, -0.14712185, 0.027320342, -0.09529667, 0.031148786, -0.28744993, 0.18698911), target2);
	target2 = MulAdd(nf1, MF4x4(0.14799193, 0.39471942, -0.23340325, -0.4031061, 0.18926248, -0.11091216, 0.118981816, -0.09155061, 0.17049436, 0.19803695, -0.1513267, 0.023817873, 0.0090933135, -0.04134864, 0.060486555, 0.03536634), target2);
	target2 = MulAdd(ng1, MF4x4(-0.39094314, 0.01779997, 0.12710269, 0.0067333193, -0.31255835, -0.08206612, -0.048528638, 0.369439, -0.19351655, -0.03420455, 0.15831526, -0.052294146, -0.08481741, 0.0787108, 0.1312136, -0.108919285), target2);
	target2 = MulAdd(nh1, MF4x4(-0.16068119, -0.42190582, 0.19383872, -0.018445708, 0.09803051, -0.020769652, -0.022599563, -0.052448895, -0.20645833, -0.031432863, 0.0025441595, 0.03410379, -0.20268854, 0.04481527, 0.05191063, 0.42317194), target2);
	target2 = MulAdd(ni1, MF4x4(-0.12786235, -0.23936178, 0.116561726, 0.30756372, -0.09420156, -0.044529166, -0.03585749, 0.1829332, -0.23939075, 0.24030831, 0.019878127, -0.015069802, 0.24300557, -0.22558568, -0.104956664, -0.09393648), target2);
	target2 = MulAdd(na2, MF4x4(-0.04607054, 0.012677649, -0.027597688, 0.1618836, 0.29210827, 0.014221155, -0.13591036, -0.06895336, -0.09559534, 0.07956421, -0.11112994, -0.13325493, 0.24562472, 0.11046177, 0.057847694, 0.0016315983), target2);
	target2 = MulAdd(nb2, MF4x4(-0.03365951, 0.027391057, 0.09653403, -0.14718771, -0.049631152, -0.06467214, -0.058545876, 0.1424002, -0.06320376, 0.181183, 0.10249362, -0.16052136, 0.3013475, -0.04156266, 0.08862033, 0.06888033), target2);
	target2 = MulAdd(nc2, MF4x4(0.10045977, -0.004198456, -0.025856055, 0.05739418, -0.1328637, -0.025975171, 0.06553717, 0.11301186, 0.0704087, -0.083569765, 0.16066101, -0.24453588, 0.25370175, 0.037184533, 0.062386766, -0.20025635), target2);
	target2 = MulAdd(nd2, MF4x4(-0.017958941, 0.06417776, -0.1525265, 0.12451173, 0.14567685, -0.0049682115, -0.23973411, -0.0783304, -0.010629432, 0.08055161, 0.2028341, 0.17640644, -0.20445108, -0.055524793, -0.019326134, 0.081288636), target2);
	target2 = MulAdd(ne2, MF4x4(0.007882519, -0.03722546, 0.053249408, 0.00071846246, -0.07053029, -0.21583866, 0.1415364, -0.19486657, 0.20685542, 0.17660026, -0.32156837, 0.1746825, -0.14957622, -0.09224378, -0.098153435, -0.13054638), target2);
	target2 = MulAdd(nf2, MF4x4(0.10051427, -0.17398237, 0.09842799, -0.14187703, 0.116901085, -0.1229543, -0.0007776771, -0.20410055, -0.11373484, -0.111150615, -0.1974002, -0.11641459, 0.024105398, 0.24985977, 0.015871854, -0.10724633), target2);
	target2 = MulAdd(ng2, MF4x4(-0.18081793, 0.1209351, -0.12867971, -0.019415248, 0.062617876, -0.037130393, -0.07803658, -0.22862352, 0.2586428, -0.030090366, -0.11894069, 0.18087515, -0.40921417, 0.070013195, 0.030540073, 0.035120826), target2);
	target2 = MulAdd(nh2, MF4x4(-0.13185939, 0.12992652, 0.08125049, 0.075331174, 0.064219765, 0.056629725, -0.020012032, -0.0855444, -0.044063166, -0.05396545, -0.028002812, 0.21837157, -0.15206428, -0.12681007, 0.14895032, 0.12339962), target2);
	target2 = MulAdd(ni2, MF4x4(0.08066341, -0.14773634, -0.0212227, -0.014011867, -0.048505764, 0.075407125, -0.020620076, 0.0003291325, -0.21815202, -0.23136546, 0.10853532, -0.036058456, 0.10952532, -0.052677035, -0.13005799, 0.18398996), target2);

	tex3[gxy] = target1;
	tex4[gxy] = target2;
}


//!PASS 3
//!DESC Conv-4x3x3x16
//!IN tex3, tex4
//!OUT tex1, tex2, tex5
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass3(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	MF4 a1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d1 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e1 = tex3.SampleLevel(sam, pos, 0);
	MF4 f1 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na1 = max(-a1, 0);
	MF4 nb1 = max(-b1, 0);
	MF4 nc1 = max(-c1, 0);
	MF4 nd1 = max(-d1, 0);
	MF4 ne1 = max(-e1, 0);
	MF4 nf1 = max(-f1, 0);
	MF4 ng1 = max(-g1, 0);
	MF4 nh1 = max(-h1, 0);
	MF4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);

	MF4 a2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d2 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e2 = tex4.SampleLevel(sam, pos, 0);
	MF4 f2 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na2 = max(-a2, 0);
	MF4 nb2 = max(-b2, 0);
	MF4 nc2 = max(-c2, 0);
	MF4 nd2 = max(-d2, 0);
	MF4 ne2 = max(-e2, 0);
	MF4 nf2 = max(-f2, 0);
	MF4 ng2 = max(-g2, 0);
	MF4 nh2 = max(-h2, 0);
	MF4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);

	MF4 target1 = { -0.022956269, 0.029688787, -0.070148066, -0.07163476 };
	target1 = MulAdd(a1 , MF4x4(-0.069641694, 0.104958326, 0.14786446, 0.027633663, -0.004279524, -0.020451711, 0.0883571, -0.016224537, 0.13585235, 0.11078269, 0.20198658, -0.042161036, 0.020466218, 0.20994963, 0.20072585, -0.028024657), target1);
	target1 = MulAdd(b1 , MF4x4(0.050872434, 0.12874635, 0.1298729, 0.115810685, 0.07087254, 0.09885682, 0.23018982, 0.19187538, 0.10953604, 0.0033836907, -0.13325337, 0.09830315, -0.06528767, 0.05096927, -0.016355392, -0.039334368), target1);
	target1 = MulAdd(c1 , MF4x4(0.027010268, 0.018263958, 0.0360758, 0.016791478, 0.2815702, 0.15517488, 0.43415815, 0.044976447, -0.0070842914, -0.12546758, 0.16874593, 0.077622116, 0.02252915, 0.1769774, 0.07181055, -0.1512869), target1);
	target1 = MulAdd(d1 , MF4x4(0.057129618, 0.118046716, 0.07237424, -0.07842637, -0.044214778, -0.12886304, 0.08603301, -0.10416606, -0.15852053, 0.3788151, 0.26181692, -0.09092249, 0.31635332, 0.064212754, 0.21923725, 0.07500004), target1);
	target1 = MulAdd(e1 , MF4x4(-0.16981383, 0.044409662, -0.3717617, -0.031610407, 0.03658662, -0.09459229, -0.09449437, -0.014000666, -0.19656453, 0.03934163, -0.16304104, -0.12761801, -0.06235523, 0.16438273, -0.036933117, -0.095564745), target1);
	target1 = MulAdd(f1 , MF4x4(0.09725091, 0.034022827, 0.17699842, 0.1079676, -0.13236652, 0.03718181, -0.06968635, -0.23288171, 0.10275666, 0.08464966, -0.37162134, -0.35782215, -0.11023659, 0.2519236, -0.035197742, -0.019324787), target1);
	target1 = MulAdd(g1 , MF4x4(-0.09968464, 0.01102193, 0.0073735216, 0.011999313, -0.004998707, 0.09518938, 0.045727003, -0.21544908, 0.006879454, -0.06398254, -0.12584935, -0.06759933, -0.0820037, -0.07775104, 0.021957919, -0.122708224), target1);
	target1 = MulAdd(h1 , MF4x4(-0.08869767, 0.031296413, -0.0034280645, 0.13778855, 0.10073061, -0.08393937, -0.032959275, -0.0500518, 0.010908757, -0.09189417, -0.057760105, 0.17652664, -0.08729078, -0.09639096, -0.25654703, 0.055152636), target1);
	target1 = MulAdd(i1 , MF4x4(0.0027847723, -0.12885433, 0.038065907, 0.17450769, 0.0864409, 0.04592345, -0.015443841, 0.077010944, 0.08967368, 0.06800111, -0.23636387, 0.35023567, 0.03165923, 0.03132063, 0.17964344, 0.035610788), target1);
	target1 = MulAdd(a2 , MF4x4(-0.032017227, -0.0022808525, -0.08470573, 0.05332408, -0.14674746, 0.025374275, -0.018281924, 0.041163016, 0.00096549373, 0.014724006, 0.004913065, 0.18494442, 0.034953076, -0.15731992, -0.13792977, 0.08041999), target1);
	target1 = MulAdd(b2 , MF4x4(0.08305006, 8.6318905e-05, -0.007895379, 0.02731387, -0.061324496, 0.050034665, 0.22662131, -0.013876427, -0.074468784, -0.008136604, -0.23337875, -0.1742574, 0.011753501, -0.11666686, -0.22541048, -0.14549944), target1);
	target1 = MulAdd(c2 , MF4x4(-0.028333234, 0.121047184, 0.06720256, -0.058930036, 0.030258363, 0.07292774, 0.06455556, 0.0019076486, 0.0073987027, 0.17144889, 0.06084024, -0.08762086, -0.114422195, -0.16595861, -0.08706028, -0.10736261), target1);
	target1 = MulAdd(d2 , MF4x4(-0.02519315, -0.14611271, 0.0388848, 0.19481422, -0.05970354, -0.08391417, 0.18982239, -0.10447052, 0.15587378, -0.023997072, 0.0781739, 0.2182389, -0.023886079, -0.1422596, -0.13352804, 0.005008043), target1);
	target1 = MulAdd(e2 , MF4x4(0.08842712, -0.100292705, 0.18925671, 0.12198875, 0.061771665, -0.04473232, 0.025053164, 0.039047796, -0.1672479, -0.08934517, 0.33099812, -0.20269585, -0.21640155, -0.22029749, 0.16539703, -0.2442679), target1);
	target1 = MulAdd(f2 , MF4x4(-0.16332205, -0.101898365, 0.02919932, -0.11900455, 0.14442924, 0.0916815, 0.037550304, 0.024123482, 0.02042624, 0.033472955, -0.059437107, -0.18735693, -0.013749093, -0.06199881, -0.08685079, 0.04252364), target1);
	target1 = MulAdd(g2 , MF4x4(-0.09047013, -0.055188328, -0.09106191, -0.048969727, 0.05114009, -0.12753403, 0.07116141, 0.060749624, -0.074034564, -0.21952136, -0.09479503, 0.2753584, -0.014141759, -0.14883812, -0.0673838, -0.012279045), target1);
	target1 = MulAdd(h2 , MF4x4(0.013816464, -0.0747162, -0.19202435, -0.064403646, 0.34980014, 0.04375546, 0.20264609, 0.006684355, 0.11523799, 0.024674915, -0.08697566, -0.04662527, -0.12743855, -0.39463726, 0.0057380227, 0.01286557), target1);
	target1 = MulAdd(i2 , MF4x4(-0.08146522, 0.074080914, -0.16856177, -0.183158, 0.19228102, 0.12373886, 0.017574452, -0.01753772, 0.045071773, 0.07725093, 0.023422163, -0.011545186, 0.20751388, -0.10795588, 0.07606346, 0.10282933), target1);
	target1 = MulAdd(na1 , MF4x4(0.12512013, -0.102208994, -0.09125398, 0.12043188, -0.066011876, 0.08831903, -0.017038671, -0.005541508, -0.049607087, 0.08654939, -0.02037085, 0.26887566, 0.005012545, 0.01869507, -0.013064982, -0.010649147), target1);
	target1 = MulAdd(nb1 , MF4x4(0.006824864, -0.05071593, -0.20786697, -0.07327317, 0.011382597, 0.030494886, -0.04754353, -0.018284699, 0.01305972, -0.036589053, 0.26637617, 0.021887446, -0.026669119, -0.037982125, -0.063445956, -0.009104248), target1);
	target1 = MulAdd(nc1 , MF4x4(0.032602567, 0.07094331, 0.052653246, 0.08342047, -0.085082285, -0.14674088, -0.23073354, -0.07915851, 0.0017120204, 0.032407638, -0.039819505, 0.16942178, 0.023192152, -0.0353237, 0.10930186, 0.22939779), target1);
	target1 = MulAdd(nd1 , MF4x4(0.0010455973, -0.11821993, -0.12639599, 0.12250084, -0.12756817, 0.11478416, -0.1862587, 0.016819192, 0.02110181, -0.25492984, -0.1766048, 0.22188173, -0.21305011, 0.113442205, 0.04599144, -0.15840286), target1);
	target1 = MulAdd(ne1 , MF4x4(-0.15086032, -0.17428935, 0.39080557, 0.07576757, 0.121703945, 0.17944208, -0.003140103, -0.11231332, 0.12102969, 0.15310267, 0.17578171, 0.40631834, -0.21299168, 0.024928993, 0.030104794, 0.020753227), target1);
	target1 = MulAdd(nf1 , MF4x4(-0.098734386, -0.020072265, -0.14308836, -0.08490801, 0.017175158, 0.02250534, 0.04060829, 0.033022214, 0.0046218676, 0.17923212, 0.0112105915, 0.09574084, 0.14819936, -0.14692923, 0.12634254, 0.060762513), target1);
	target1 = MulAdd(ng1 , MF4x4(0.030521613, -0.097913325, -0.016720278, 0.11273997, 0.013019863, -0.06557118, 0.0405774, 0.0915019, 0.022414956, -0.053254984, 0.18639986, 0.07820968, 0.06498986, 0.058922634, -0.02240318, -0.086019725), target1);
	target1 = MulAdd(nh1 , MF4x4(0.2058775, 0.01502064, 0.05847032, 0.007249146, 0.086483665, 0.19420148, 0.03892261, -0.013546935, -0.07980237, 0.04347281, -0.10376214, -0.1366535, 0.05285337, 0.07213318, 0.3642818, -0.11331124), target1);
	target1 = MulAdd(ni1 , MF4x4(-0.025740806, 0.14551482, -0.037410017, -0.17477523, -0.11853099, -0.060820814, -0.102599286, -0.13267937, -0.103053465, -0.014044828, -0.01888072, -0.06499249, 0.22311528, -0.051850274, -0.034120858, 0.044562567), target1);
	target1 = MulAdd(na2 , MF4x4(-0.21360217, 0.10093803, -0.0016407765, -0.1473997, 0.26524043, 0.02112132, 0.23173104, -0.013157391, 0.05945182, 0.044635538, 0.06031638, -0.21435826, -0.10147484, 0.069090195, 0.09641844, -0.09581093), target1);
	target1 = MulAdd(nb2 , MF4x4(-0.08576515, -0.122861005, 0.049567085, -0.085854456, 0.23809357, -0.024966082, -0.10294079, 0.046241313, 0.008621132, -0.08323767, 0.20277941, 0.163423, -0.07386535, -0.088738985, 0.05274358, -0.025479877), target1);
	target1 = MulAdd(nc2 , MF4x4(-0.041135542, -0.008365642, 0.17088248, 0.04025207, 0.13809255, -0.056895368, -0.01582834, 0.07361908, -0.00068995473, -0.09300962, 0.19117641, 0.24832036, -0.06572358, -0.026025, -0.019093119, -0.049720034), target1);
	target1 = MulAdd(nd2 , MF4x4(0.024900286, 0.11525501, 0.025882801, 0.037742402, 0.36976853, 0.052211333, -0.15143296, 0.1802276, -0.059080046, 0.017990451, 0.026395092, -0.12689115, -0.07705386, 0.1232379, 0.13273561, -0.12521964), target1);
	target1 = MulAdd(ne2 , MF4x4(-0.19788785, 0.044887315, 0.07663442, 0.16688696, -0.2842248, -0.15684547, 0.028387763, 0.0063470444, -0.012245601, -0.038382255, -0.8187406, -0.25245667, 0.23014604, 0.22746666, 0.1594356, 0.16469443), target1);
	target1 = MulAdd(nf2 , MF4x4(-0.12663333, 0.014730006, 0.03765697, 0.15704912, -0.106595434, -0.05317512, -0.081759915, -0.08797109, 0.064620756, -0.06341419, 0.16493447, 0.23102313, 0.068325415, -0.088058695, 0.16885915, 0.036382258), target1);
	target1 = MulAdd(ng2 , MF4x4(0.035389822, -0.11811836, -0.035656307, -0.0680554, 0.1338908, 0.065852076, 0.023307983, 0.0675308, 0.09690683, 0.18170924, 0.09862692, -0.20964378, -0.08601271, -0.20016764, -0.01879598, -0.14629345), target1);
	target1 = MulAdd(nh2 , MF4x4(-0.27183273, 0.013525998, -0.14995874, -0.23938845, -0.26218823, -0.0009874097, -0.13385512, -0.10664239, -0.048931994, 0.039898522, 0.047444753, 0.10934722, 0.10969629, 0.123539805, 0.11692802, 0.14172275), target1);
	target1 = MulAdd(ni2 , MF4x4(-0.1656506, 0.019683002, 0.0221048, 0.12596753, 0.20420644, -0.07930122, 0.04653823, 0.11492255, -0.0050175437, -0.03271697, 0.013389486, 0.034583613, -0.2196601, -0.1615663, -0.013763388, -0.056037936), target1);

	MF4 target2 = { 0.103826486, 0.045373913, 0.11565896, -0.06568643 };
	target2 = MulAdd(a1 , MF4x4(-0.15104648, 0.05522861, -0.0654341, -0.053517453, -0.08264124, -0.0062249107, -0.20364265, -0.05015117, -0.18837251, 0.030655831, 0.046844713, -0.20673253, -0.14042036, -0.05655449, 0.13994302, 0.011745607), target2);
	target2 = MulAdd(b1 , MF4x4(-0.16517559, 0.1489214, -0.09149559, 0.025003506, -0.124926426, 0.16974348, -0.020857265, 0.08017403, 0.21836148, 0.0025619378, 0.2331612, 0.085599184, -0.030934382, -0.055194855, 0.09527726, -0.10081552), target2);
	target2 = MulAdd(c1 , MF4x4(0.041800212, 0.028859638, 0.09395546, 0.05211183, -0.038541477, 0.021495212, 0.04862346, -0.007864793, 0.038407274, -0.13841268, -0.14963801, 0.26470762, 0.16691841, -0.07262008, 0.034374326, -0.14709206), target2);
	target2 = MulAdd(d1 , MF4x4(0.00094978884, -0.028974704, -0.0900548, -0.08401967, -0.08935931, -0.043606587, -0.14497143, -0.05226239, -0.21516493, 0.19410603, -0.089924194, -0.04335071, -0.012618276, -0.2671613, 0.020422975, -0.037739716), target2);
	target2 = MulAdd(e1 , MF4x4(-0.13403237, -0.02524383, -0.03474901, 0.054432765, 0.11946775, 0.107336655, -0.1431715, -0.13370377, 0.015087512, -0.1917613, 0.073493585, 0.2788855, -0.010510839, 0.06891479, -0.06741307, -0.05271205), target2);
	target2 = MulAdd(f1 , MF4x4(-0.15432046, 0.04021662, -0.16979513, 0.13660534, -0.10518303, -0.10095502, -0.13092068, 0.022805348, -0.16676381, -0.4273298, 0.020867536, 0.3506733, -0.29459694, -0.055828743, -0.069241956, 0.04106382), target2);
	target2 = MulAdd(g1 , MF4x4(-0.08890133, 0.07549666, -0.040735144, -0.1506932, -0.22227979, -0.0762723, -0.17766447, -0.05741318, -0.21885683, 0.2379157, -0.15525854, -0.07306285, 0.15580738, -0.04394069, -0.19175608, 0.018283797), target2);
	target2 = MulAdd(h1 , MF4x4(-0.08503275, -0.105500385, -0.114987396, -0.07166016, -0.2147138, 0.09378708, 0.24550334, -0.0834075, -0.033147786, -0.022304727, -0.31062204, 0.027651973, 0.109098755, 0.18889032, 0.1163026, 0.13863255), target2);
	target2 = MulAdd(i1 , MF4x4(0.15266588, -0.14901319, 0.033916786, 0.09381096, -0.08196443, -0.16194504, 0.035789456, 0.21234898, -0.48724765, 0.2619442, -0.11215393, 0.25061038, 0.022344576, 0.0116525125, 0.111661114, -0.15242295), target2);
	target2 = MulAdd(a2 , MF4x4(0.020475458, 0.0797404, -0.13576819, 0.009681671, 0.030504882, 0.049232908, 0.022025917, 0.16912088, -0.23914136, -0.084663324, 0.020925451, -0.1023938, 0.035916872, -0.07538111, -0.11470242, 0.15238516), target2);
	target2 = MulAdd(b2 , MF4x4(-0.12941381, 0.08509899, -0.029489802, -0.09148447, -0.089406274, -0.116145454, -0.08979843, 0.11908148, 0.15473351, -0.21687616, 0.12607013, -0.08244334, -0.079580925, -0.16613089, -0.09287793, -0.03412643), target2);
	target2 = MulAdd(c2 , MF4x4(-0.023578499, 0.07394217, -0.13069086, -0.1060499, -0.07559958, -0.21839201, 0.1090753, 0.0787872, 0.07677037, -0.25998843, 0.20039314, 0.046882212, 0.31871012, -0.3048051, 0.15118991, -0.00518087), target2);
	target2 = MulAdd(d2 , MF4x4(-0.15338503, -0.11057532, 0.075839415, -0.18592294, -0.0155324, 0.038140323, -0.10498194, 0.09070477, 0.05108992, -0.047939524, -0.091004305, 0.09649005, -0.10967152, -0.051909525, -0.05314551, 0.09661584), target2);
	target2 = MulAdd(e2 , MF4x4(-0.14458802, -0.053263694, -0.0010885567, 0.23342133, 0.01918937, 0.12026143, -0.15691495, 0.30480555, -0.08725869, 0.19082253, 0.3594973, 0.016653897, 0.045152336, -0.088590585, 0.0069655925, 0.1392425), target2);
	target2 = MulAdd(f2 , MF4x4(0.17944881, -0.17950764, 0.13282645, 0.030974053, 0.32233685, 0.18067117, -0.11472813, 0.097301506, -0.047649745, -0.1053861, -0.081039384, 0.035132434, 0.10204545, 0.085582554, -0.13153993, -0.021741152), target2);
	target2 = MulAdd(g2 , MF4x4(-0.15573682, 0.16409989, -0.22574787, -0.03877603, -0.18285516, 0.11638645, 0.18321282, -0.017770218, 0.18230622, 0.16433364, -0.12795393, -0.03805153, 0.14386104, -0.0891527, -0.056928284, -0.10961495), target2);
	target2 = MulAdd(h2 , MF4x4(0.257622, 0.052519716, -0.25421762, -0.1887382, -0.083568096, -0.0064690276, -0.029110614, 0.103327505, -0.17006217, 0.2254096, -0.29366904, 0.04302887, -0.10198446, -0.24423616, 0.16781262, -0.005019004), target2);
	target2 = MulAdd(i2 , MF4x4(0.103393994, -0.059044626, -0.18192382, 0.0990813, -0.26143607, 0.11036474, 0.04788275, -0.096738026, 0.12825653, 0.13631694, -0.077904984, -0.020790676, -0.25118098, 0.122588515, -0.049440473, -0.10758222), target2);
	target2 = MulAdd(na1 , MF4x4(0.06693113, -0.13647175, 0.131139, 0.13143918, 0.081720434, 0.117537096, 0.15387627, -0.008771362, 0.08513583, 0.023794742, -0.0661625, 0.115793936, 0.0023350024, 0.02215075, -0.0494433, -0.013404977), target2);
	target2 = MulAdd(nb1 , MF4x4(0.041419264, -0.17622781, 0.028418267, 0.12114493, -0.23587078, 0.08457395, 0.014364018, -0.103271864, -0.051572207, -0.026424447, 0.16755055, -0.10763651, -0.033440586, 0.068594255, -0.050668504, 0.1941505), target2);
	target2 = MulAdd(nc1 , MF4x4(-0.2780181, 0.037816502, -0.11516711, -0.09822884, 0.13762361, -0.14317706, 0.14350282, 0.000623895, -0.08601606, 0.08118504, 0.15497385, -0.04721711, -0.008936935, -0.014223618, -0.09641698, -0.013884213), target2);
	target2 = MulAdd(nd1 , MF4x4(0.14349665, -0.03144472, -0.057813704, 0.0667044, 0.09026094, 0.051366236, 0.11139983, -0.015782114, -0.18314016, -0.18774192, 0.0014838242, 0.15759028, 0.062388215, 0.13626057, 0.02576217, -0.06317815), target2);
	target2 = MulAdd(ne1 , MF4x4(0.07151769, 0.14508991, 0.1736844, -0.11487795, -0.07999805, -0.07797908, 0.037923355, -0.059138823, -0.23531209, -0.040207293, -0.068355694, -0.024296658, -0.114820175, 0.19726487, 0.21772414, 0.03659222), target2);
	target2 = MulAdd(nf1 , MF4x4(0.16858695, -0.12135113, 0.009391182, -0.081519485, 0.13340487, 0.07007004, 0.094124354, 0.035519842, -0.3320139, -0.06624027, -0.14716229, -0.09205287, 0.12664132, -0.05655441, 0.0123263765, 0.04641279), target2);
	target2 = MulAdd(ng1 , MF4x4(0.19018422, -0.15428329, -0.009354114, 0.04165953, 0.11024837, -0.107493006, -0.05807292, -0.048029456, 0.24319384, -0.10542357, -0.013699952, 0.06228662, -0.06808749, -0.023227982, 0.16528323, -0.05610251), target2);
	target2 = MulAdd(nh1 , MF4x4(-0.008616222, 0.077674195, -0.08638503, 0.09293109, 0.072474636, 0.05004233, -0.20591061, -0.005301386, -0.15486047, 0.15038474, 0.1262478, 0.021724822, 0.02274613, -0.3088281, -0.08437887, -0.10684698), target2);
	target2 = MulAdd(ni1 , MF4x4(-0.16960032, 0.09365251, -0.030414175, -0.010766254, 0.18181023, 0.12130318, 0.08913089, -0.06070321, 0.05200306, 0.092584535, 0.17694671, 0.033796314, -0.038107123, -0.04335955, -0.049443472, 0.30465958), target2);
	target2 = MulAdd(na2 , MF4x4(0.07661484, -0.009945252, 0.12866217, -0.07592757, -0.21030053, 0.014371748, -0.072458774, -0.04700072, 0.15534303, 0.2007125, -0.15699059, -0.032897495, 0.08110436, -0.11243608, 0.008632577, -0.10153441), target2);
	target2 = MulAdd(nb2 , MF4x4(-0.034697928, 0.06928288, -0.2796273, 0.14405379, 0.12248569, 0.036539096, 0.06607706, 0.077684596, -0.16473202, 0.1665916, -0.29977503, 0.21047153, 0.13114224, -0.091579035, -0.045458574, 0.03254245), target2);
	target2 = MulAdd(nc2 , MF4x4(0.053284872, 0.053366095, -0.26152626, -0.03123967, -0.031794485, 0.17670582, -0.07450994, 0.017521491, -0.040290453, 0.38342363, -0.25021288, -0.014660264, 0.1621895, 0.25041878, -0.12124821, 0.068036206), target2);
	target2 = MulAdd(nd2 , MF4x4(0.11366693, -0.030863572, -0.07411263, 0.12475283, -0.046070684, -0.09033321, 0.013222701, 0.06798592, -0.32814804, 0.057653826, -0.14082801, -0.00217398, -0.22856179, -0.19058353, -0.20992154, -0.03701372), target2);
	target2 = MulAdd(ne2 , MF4x4(0.20345633, -0.1332355, 0.27152926, -0.13477845, -0.25242096, -0.28281286, 0.31289554, 0.14284514, 0.53362453, -0.46766588, 0.4518293, -0.39291728, -0.3573227, -0.014670052, 0.0051881406, 0.16552156), target2);
	target2 = MulAdd(nf2 , MF4x4(-0.15017267, -0.07792945, -0.204405, 0.13964304, -0.13642666, -0.10228306, 0.03238279, -0.08689329, -0.072262034, -0.0258388, 0.05689183, 0.055701543, -0.19800112, 0.012217054, -0.033292748, -0.047611095), target2);
	target2 = MulAdd(ng2 , MF4x4(-0.014704416, -0.12203891, 0.066083655, -0.1409769, 0.0041513643, -0.087383606, -0.17498164, 0.11327789, -0.25947225, -0.0016027623, 0.08202566, 0.042270098, 0.006429511, -0.26576808, -0.08461341, 0.049376782), target2);
	target2 = MulAdd(nh2 , MF4x4(0.0695189, -0.14753938, 0.09578246, -0.16607563, -0.0105561055, 0.17166016, 0.027422488, -0.14175262, -0.009492696, -0.23449713, 0.018270867, 0.14635146, 0.33451268, 0.030959005, -0.46468422, 0.024256868), target2);
	target2 = MulAdd(ni2 , MF4x4(-0.16865666, -0.00015881563, -0.054488145, -0.06222717, -0.032101758, 0.06485387, -0.0028512608, 0.046645947, 0.017593225, -0.19447896, -0.024742266, 0.03970127, 0.29845607, -0.16168733, 0.035172883, 0.07924657), target2);

	MF3 target3 = MulAdd(e1, MF4x3(0.09689336, 0.06046458, 0.072598994, 0.11994565, 0.104477674, 0.09302802, -0.05718302, 0.050438102, 0.08814741, 0.0308889, 0.0033925986, -0.01715605), 0.0);
	target3 = MulAdd(e2, MF4x3(-0.028314235, 0.06597744, 0.0966897, 0.035656154, 0.07770106, 0.075551905, 0.0001793458, -0.000479495, -0.00297406, -0.053916585, -0.016807461, -0.0057141334), target3);
	target3 = MulAdd(ne1, MF4x3(-0.047189303, -0.0207, -0.020910334, -0.07933196, -0.06961211, -0.086069845, 0.0943727, 0.008463375, 0.010755166, 0.062410597, 0.022625161, 0.04068433), target3);
	target3 = MulAdd(ne2, MF4x3(0.10270994, -0.019080428, 0.0050091282, -0.004672948, -0.013966742, -0.0063746064, -2.5856789e-05, 0.03151499, -0.0023983798, 0.113539025, 0.12381699, 0.100360274), target3);

	tex1[gxy] = target1;
	tex2[gxy] = target2;
	tex5[gxy] = MF4(target3, 1);
}


//!PASS 4
//!DESC Conv-4x3x3x16
//!IN tex1, tex2, tex5
//!OUT tex3, tex4, tex6
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass4(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	MF4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e1 = tex1.SampleLevel(sam, pos, 0);
	MF4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na1 = max(-a1, 0);
	MF4 nb1 = max(-b1, 0);
	MF4 nc1 = max(-c1, 0);
	MF4 nd1 = max(-d1, 0);
	MF4 ne1 = max(-e1, 0);
	MF4 nf1 = max(-f1, 0);
	MF4 ng1 = max(-g1, 0);
	MF4 nh1 = max(-h1, 0);
	MF4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);

	MF4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e2 = tex2.SampleLevel(sam, pos, 0);
	MF4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na2 = max(-a2, 0);
	MF4 nb2 = max(-b2, 0);
	MF4 nc2 = max(-c2, 0);
	MF4 nd2 = max(-d2, 0);
	MF4 ne2 = max(-e2, 0);
	MF4 nf2 = max(-f2, 0);
	MF4 ng2 = max(-g2, 0);
	MF4 nh2 = max(-h2, 0);
	MF4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);

	MF4 target1 = { 0.0001705175, -0.031081453, 0.010100773, -0.027214011 };
	target1 = MulAdd(a1, MF4x4(0.1851775, 0.053705044, 0.033816848, -0.018555025, -0.21204336, -0.01706974, 0.088259794, -0.13126148, 0.10729598, -0.043457437, 0.08634712, 0.09220895, 0.062131613, -0.01995871, 0.05181067, 0.18520063), target1);
	target1 = MulAdd(b1, MF4x4(0.1662002, -0.14197104, -0.052809287, 0.025287712, -0.08330898, -0.08998097, -0.15642618, -0.14941245, -0.03481203, 0.061857622, 0.26051775, -0.0005498248, 0.086427025, 0.024108192, -0.12418039, 0.022286376), target1);
	target1 = MulAdd(c1, MF4x4(0.058200672, -0.3073398, 0.17150162, -0.13394679, -0.075118184, -0.14607768, -0.006172172, 0.007731589, -0.21818224, -0.06449433, -0.038958784, 0.037722416, 0.28699976, -0.027563032, 0.23295315, 0.028444216), target1);
	target1 = MulAdd(d1, MF4x4(0.12871371, 0.0064904913, 0.14985761, -0.10923005, 0.17413563, 0.1599109, -0.08457703, 0.108153716, -0.08871187, -0.06661137, 0.2754416, -0.009667768, 0.39819396, 0.12392097, 0.14145902, 0.0019376524), target1);
	target1 = MulAdd(e1, MF4x4(0.13893189, 0.12715353, 0.015191678, -0.21003054, -0.030412354, -0.01676613, -0.19799289, -0.006130075, 0.37676954, -0.14475077, -0.2065198, -0.30432892, -0.14944535, -0.09121536, -0.107600585, -0.24462196), target1);
	target1 = MulAdd(f1, MF4x4(-0.11653076, -0.0068671284, -0.02249137, -0.17877012, -0.15063138, -0.13514869, 0.107643366, -0.03196477, -0.086422764, 0.3079287, 0.17584166, -0.032449376, -0.06917114, -0.2682637, -0.18978168, -0.037039287), target1);
	target1 = MulAdd(g1, MF4x4(0.12014731, -0.030360512, -0.12954475, -0.110275604, -0.077214256, 0.019689744, 0.22149551, -0.002266716, 0.09697784, -0.124532826, -0.16776511, -0.034212478, -0.36935154, 0.016926935, 0.1363609, 0.20415346), target1);
	target1 = MulAdd(h1, MF4x4(-0.11199535, -0.001692563, -0.09058429, -0.08437503, 0.092625685, 0.06046257, 0.25509837, -0.011657033, -0.17949764, -0.10718947, -0.1180669, -0.24681842, -0.1747311, 0.0014518246, -0.042863015, 0.06103357), target1);
	target1 = MulAdd(i1, MF4x4(0.14979295, -0.037154514, 0.01957725, 0.012282435, 0.09168596, -0.05552286, 0.111671515, 0.0078630615, -0.10319766, -0.06416261, -0.23097566, -0.13931875, 0.2110811, 0.013095802, -0.2306504, -0.025639111), target1);
	target1 = MulAdd(a2, MF4x4(-0.10091975, -0.10095426, -0.023449723, -0.022170888, 0.054953706, -0.13049407, 0.08289061, 0.023241632, 0.08735388, -0.0058387457, 0.17897247, 0.011434436, 0.008181139, -0.0034718404, -0.015372735, -0.07657766), target1);
	target1 = MulAdd(b2, MF4x4(-0.023442164, 0.07535702, 0.024391165, -0.050532013, 0.044168636, 0.0062343236, -0.019756999, -0.009695123, 0.10102337, 0.0052776975, -0.14944167, -0.060957722, 0.24367364, -0.08069369, 0.12170072, -0.047048368), target1);
	target1 = MulAdd(c2, MF4x4(-0.18376935, -0.08407229, -0.12943378, 0.0738419, -0.12404976, -0.13367929, 0.11265896, -0.021353, 0.003783386, 0.50088304, 0.14058582, 0.041053623, 0.038247623, -0.014179976, 0.007905778, -0.042492237), target1);
	target1 = MulAdd(d2, MF4x4(-0.046272535, 0.052449115, 0.17190954, -0.004745371, -0.045572635, -0.09292636, 0.36309823, 0.16673928, -0.099154025, -0.109614775, 0.17803112, 0.19907133, -0.14306267, 0.06898593, 0.11493454, 0.06795014), target1);
	target1 = MulAdd(e2, MF4x4(0.26181114, -0.044014625, -0.21605036, -0.08646438, 0.21038742, -0.084986, 0.0504626, 0.17514943, -0.25218952, -0.18691514, 0.057650108, 0.08653614, -0.101205684, 0.03176334, 0.18569492, 0.17973189), target1);
	target1 = MulAdd(f2, MF4x4(-0.0339215, 0.20112811, -0.12986277, 0.028961731, -0.056813832, 0.04451147, -0.07827432, -0.0860976, 0.096853435, 0.3483546, -0.35758162, -0.11749375, -0.035918653, 0.06140711, -0.08520154, 0.02418808), target1);
	target1 = MulAdd(g2, MF4x4(-0.09643022, -0.10491069, 0.0068604187, 0.023679713, 0.096521445, -0.29323488, 0.33353668, 0.112864286, -0.1172182, -0.07233183, 0.06607239, 0.08589609, 0.055790007, 0.14396138, -0.14191268, 0.00034840964), target1);
	target1 = MulAdd(h2, MF4x4(0.15357164, -0.038462736, 0.08143956, 0.1744909, 0.40503287, -0.114508316, 0.003937322, 0.2536635, -0.042445306, -0.15622465, 0.09155284, 0.010992155, -0.20646071, 0.022801135, 0.08894491, 0.069300614), target1);
	target1 = MulAdd(i2, MF4x4(-0.12663515, 0.023849454, -0.053604446, 0.12082873, -0.247968, -0.020969635, -0.03831894, -0.014617553, 0.22630337, 0.037801865, 0.052950703, 0.04285706, -0.14487264, 0.20786528, -0.08719664, 0.1752347), target1);
	target1 = MulAdd(na1, MF4x4(-0.073527604, -0.050752833, 0.051830504, 0.32868716, 0.17474994, 0.016937364, -0.08792601, -0.024481766, -0.022229593, 0.030706186, 0.09213566, -0.076506205, 0.073404044, 0.10368055, -0.175889, -0.08453031), target1);
	target1 = MulAdd(nb1, MF4x4(-0.06838216, 0.007698341, 0.063972116, -0.015604406, 0.16135305, 0.18044342, 0.024137018, -0.23326185, 0.13235588, -0.009096587, -0.058368143, -0.077040404, 0.0011419816, -0.09246194, 0.061036937, 0.049564146), target1);
	target1 = MulAdd(nc1, MF4x4(0.023225296, -0.00060856267, -0.07775185, 0.016958566, -0.2641349, -0.08263046, -0.15350416, -0.30203494, 0.113956556, -0.010813236, -0.017738314, -0.13689043, -0.10318342, 0.025793184, -0.010336172, 0.09733422), target1);
	target1 = MulAdd(nd1, MF4x4(-0.04462596, 0.052866418, -0.34754288, 0.05540498, -0.24492586, -0.32016864, 0.18145293, 0.24873725, 0.32388234, -0.034801524, -0.1347588, -0.07565546, 0.015183539, 0.05059595, 0.08090056, 0.05930932), target1);
	target1 = MulAdd(ne1, MF4x4(0.045346696, -0.052527856, 0.052270077, 0.13417454, 0.05200045, 0.028119288, 0.005115497, 0.22952151, -0.2158375, 0.12241308, 0.3507457, 0.08616576, 0.07592416, 0.28470486, 0.3432788, 0.24857087), target1);
	target1 = MulAdd(nf1, MF4x4(0.21311626, 0.052607164, 0.1248861, 0.20193806, 0.045226507, 0.14512901, -0.15103437, -0.17926466, 0.11657411, -0.32711068, -0.16332194, -0.07793982, -0.21802668, 0.5183869, -0.13567342, 0.07823041), target1);
	target1 = MulAdd(ng1, MF4x4(0.00796368, 0.048073012, -0.14537893, -0.021708772, 0.036246423, 0.1062395, 0.12605369, 0.007073524, -0.1572743, 0.07439501, 0.089162275, -0.0039608316, 0.332032, -0.05461242, -0.17615359, -0.10240517), target1);
	target1 = MulAdd(nh1, MF4x4(0.20636982, -0.0024615112, -0.10625786, 0.024270926, 0.061810836, -0.13585201, -0.16581286, 0.23549418, 0.01928842, 0.07404979, -0.054449487, 0.04096373, 0.046939734, 0.003980803, 0.02111498, 0.064925276), target1);
	target1 = MulAdd(ni1, MF4x4(0.10485388, 0.06850885, -0.11292169, 0.16991565, -0.15282536, 0.124175504, -0.050431166, -0.06689582, -0.00059811946, 0.033696912, 0.11055047, 0.033060126, -0.17472714, 0.0048819613, -0.04478706, -0.1344572), target1);
	target1 = MulAdd(na2, MF4x4(-0.20473132, 0.056477875, 0.059559986, 0.115130566, -0.058425788, -0.035971727, 0.08334707, -0.096510135, -0.23206294, 0.10635798, -0.21575621, -0.07063254, 0.03877511, -0.107549034, 0.22248401, 0.21702304), target1);
	target1 = MulAdd(nb2, MF4x4(-0.02557767, 0.09886609, -0.100499466, 0.16687396, -0.084830604, 0.03150401, -0.049512494, 0.05595696, -0.13193256, -0.08585273, 0.14247662, 0.12290477, -0.07168309, 0.14531752, -0.048359327, 0.27716598), target1);
	target1 = MulAdd(nc2, MF4x4(0.13297586, 0.20674329, 0.14469388, 0.08981846, -0.004231366, -0.02819193, 0.15470329, 0.17299837, 0.113062344, -0.22716297, -0.21754944, -0.00083956274, -0.14160508, 0.1808253, 0.11268379, 0.27335623), target1);
	target1 = MulAdd(nd2, MF4x4(0.07497518, -0.06799594, -0.018158078, -0.00038999433, -0.15169668, -0.06928238, -0.33672288, -0.105485775, 0.33106267, 0.06698315, 0.019718744, -0.06810211, -0.35186404, -0.29145968, -0.056863394, 0.21498048), target1);
	target1 = MulAdd(ne2, MF4x4(-0.013215512, -0.24763754, 0.20965266, 0.1068435, -0.13234195, 0.053566497, 0.05061848, -0.28645232, 0.15518288, 0.23247199, 0.017553907, -0.25181335, -0.048030723, -0.06663929, -0.111026704, -0.12663394), target1);
	target1 = MulAdd(nf2, MF4x4(-0.010501938, -0.17995767, 0.06010859, 0.050185587, 0.108627126, -0.101203434, 0.07558728, 0.060466755, -0.106942676, -0.35854608, 0.16015992, 0.16823332, -0.06543775, -0.37310675, 0.014043972, -0.18328045), target1);
	target1 = MulAdd(ng2, MF4x4(0.09712849, 0.013983463, 0.07291423, 0.031715546, 0.030862397, 0.045510456, -0.22066842, 0.063464865, 0.11721659, -0.10596602, -0.20611264, 0.052158818, -0.3961766, -0.03781582, 0.17633812, 0.1316111), target1);
	target1 = MulAdd(nh2, MF4x4(-0.25029674, 0.07153423, -0.35125682, -0.18255402, -0.19569087, 0.00432772, -0.0969035, -0.24648514, -0.0040922165, 0.037500706, -0.038137026, 0.056214277, -0.048258524, 0.03567822, -0.05033007, -0.24696785), target1);
	target1 = MulAdd(ni2, MF4x4(-0.03465209, -0.012495964, 0.22782089, 0.012034795, 0.2916752, 0.08264436, 0.15387125, -0.1473455, -0.15614432, 0.05536727, -0.027079755, 0.010725311, -0.03325222, -0.089212805, -0.10559839, -0.19647683), target1);

	MF4 target2 = { 0.012053958, -4.6962363e-05, 0.0020099226, -0.033494607 };
	target2 = MulAdd(a1, MF4x4(-0.026301445, -0.021575214, 0.22165509, 0.059994068, 0.03341161, 0.1831188, 0.20342293, 0.110160105, 0.03908121, 0.020673111, 0.07239561, 0.038754333, 0.15266368, 0.16526422, 0.062376205, -0.09759537), target2);
	target2 = MulAdd(b1, MF4x4(0.19817191, 0.10267733, 0.17744653, 0.23283184, 0.18810122, 0.2708428, -0.12651879, 0.020756349, 0.039632563, -0.22201295, 0.04873703, 0.09159713, 0.13838065, 0.21169297, 0.30816007, 0.044463675), target2);
	target2 = MulAdd(c1, MF4x4(-0.27859214, 0.07277634, 0.0021458792, 0.0089682285, -0.069680706, 0.090415835, -0.057762265, 0.18703683, -0.03514389, -0.102816254, -0.036509827, 0.038066104, -0.0168311, 0.094478935, 0.04079697, -0.049064912), target2);
	target2 = MulAdd(d1, MF4x4(-0.20913245, -0.110538535, -0.08584027, -0.1222067, 0.05414807, -0.045247085, 0.07351766, -0.002078549, -0.1270987, -0.10164512, -0.1857815, 0.08845066, -0.03743333, -0.098948084, 0.21244387, 0.10441866), target2);
	target2 = MulAdd(e1, MF4x4(0.015990427, 0.36396438, -0.24094687, 0.30236533, -0.13271736, 0.06057376, -0.19678196, -0.28577125, -0.25427434, -0.08400598, 0.07284403, -0.18552442, -0.16425897, 0.097259276, -0.32386774, -0.2190484), target2);
	target2 = MulAdd(f1, MF4x4(-0.004581924, -0.13954072, -0.122360416, 0.14132866, -0.08529257, -0.013296556, 0.0848472, 0.09336581, 0.10332182, -0.016313016, 0.07103558, 0.032564916, -0.13478759, -0.20207484, 0.12986964, 0.1219679), target2);
	target2 = MulAdd(g1, MF4x4(0.09817874, -0.10573357, 0.100535244, 0.19608764, -0.13303067, 0.024192972, -0.030689823, 0.02574889, 0.051233094, 0.03489235, -0.18465245, -0.06943822, -0.031604882, 0.1519888, 0.09348508, 0.09187296), target2);
	target2 = MulAdd(h1, MF4x4(-0.21365458, -0.23696984, 0.13097638, -0.09435498, 0.16467983, -0.066370346, 0.1269104, -0.095128186, 0.09954892, 0.12489504, -0.43418056, 0.106512725, -0.17860703, -0.07114084, -0.07630834, -0.26642478), target2);
	target2 = MulAdd(i1, MF4x4(-0.009044342, 0.02711196, -0.14873673, 0.015405045, 0.0071443473, -0.025285944, 0.07409282, 0.06338527, 0.0149676185, 0.011741382, -0.2133069, -0.028912885, 0.19420496, 0.039629057, 0.057636812, 0.15214856), target2);
	target2 = MulAdd(a2, MF4x4(0.07629928, 0.25540486, -0.050925937, -0.18136702, 0.02261603, 0.22343902, 0.003270321, 0.10735731, -0.12541203, -0.10208828, 0.012832783, 0.2591262, 0.08122926, -0.009837677, 0.10308358, 0.19236866), target2);
	target2 = MulAdd(b2, MF4x4(0.0896358, 0.27571487, 0.04406029, -0.047453407, -0.08587119, 0.16366854, 0.20622262, 0.08347545, -0.3501584, -0.28434548, -0.07592983, 0.09098784, 0.07605388, 0.09677056, 0.0015295541, 0.05102585), target2);
	target2 = MulAdd(c2, MF4x4(0.18255898, 0.18618028, 0.0017002645, -0.013004655, -0.06436534, 0.13967068, 0.063077755, -0.10632303, -0.20803222, -0.028537111, -0.03144366, -0.08555215, 0.05154303, 0.02431626, 0.15246728, -0.013708507), target2);
	target2 = MulAdd(d2, MF4x4(-0.020998938, -0.05026291, 0.03700117, 0.00830308, -0.1949294, 0.0026698054, -0.034649856, 0.19784226, -0.083901435, -0.069783084, -0.1504053, 0.16595264, -0.07480141, 0.16067508, 0.06010996, -0.021359695), target2);
	target2 = MulAdd(e2, MF4x4(-0.040828142, -0.20158486, 0.034770954, -0.1894161, 0.11665004, 0.29729164, -0.10584386, 0.13165873, -0.18863006, -0.26719162, -0.047613148, -0.12728356, -0.2033613, 0.10550052, 0.20095508, -0.11275811), target2);
	target2 = MulAdd(f2, MF4x4(-0.0785033, -0.1896073, -0.051492307, -0.1694358, 0.1368308, 0.049355216, -0.05707422, 0.079159185, 0.024578957, -0.0923136, 0.089215435, 0.28670043, 0.027932687, 0.06510816, 0.10810999, 0.05990052), target2);
	target2 = MulAdd(g2, MF4x4(0.08135192, 0.0001326522, -0.16098668, -0.18663193, -0.10280192, 0.078255914, 0.047648013, 0.08326376, 0.055962667, 0.06302574, -0.080121025, -0.031820554, -0.019117938, 0.12515336, 0.09794088, -0.03276838), target2);
	target2 = MulAdd(h2, MF4x4(0.280923, 0.24079335, 0.007883573, 0.06270414, 0.3055441, 0.19291803, -0.16041607, 0.14836526, 0.0013885222, 0.04538063, 0.10742898, -0.064491205, 0.048174977, 4.237692e-05, -0.15194727, 0.024381457), target2);
	target2 = MulAdd(i2, MF4x4(-0.0009164131, -0.031949926, 0.0076425644, -0.036870714, -0.0031292974, 0.017726978, -0.20172147, -0.0770472, 0.26379177, 0.108997814, 0.08069395, 0.2126177, 0.012075376, -0.029457828, 0.062730506, -0.15754452), target2);
	target2 = MulAdd(na1, MF4x4(0.09167904, -0.2657421, -0.03443356, 0.03315832, -0.015365421, -0.1029612, -0.108251, 0.04261033, -0.097120754, -0.05616668, -0.09275983, 0.024902184, 0.050058514, -0.013761632, 0.07555132, -0.0046676896), target2);
	target2 = MulAdd(nb1, MF4x4(-0.10743835, -0.0007361781, -0.042085417, -0.08237517, -0.10094376, -0.24007876, 0.13924706, -0.07526801, 0.01158322, 0.15491122, 0.0069442675, -0.004242352, 0.11429785, 0.02994726, -0.11829945, -0.04108612), target2);
	target2 = MulAdd(nc1, MF4x4(0.073622055, -0.064717196, -0.0025231615, 0.13256475, 0.20159899, 0.047977835, -0.10289233, -0.18419135, -0.00888952, 0.059428576, -0.053062655, -0.02730631, 0.14545685, -0.08686949, 0.17454128, 0.035443828), target2);
	target2 = MulAdd(nd1, MF4x4(-0.010146019, 0.06712568, 0.12614638, 0.023590917, 0.025756737, 0.06603747, -0.17108095, -0.06179699, 0.027241204, -0.13196802, 0.043475866, -0.0397495, 0.05306092, 0.035672903, 0.047219284, -0.16680142), target2);
	target2 = MulAdd(ne1, MF4x4(0.079427816, -0.06716479, 0.19028603, -0.19694683, -0.061598092, -0.07471188, 0.21170339, 0.30140215, -0.0023369973, 0.04688297, -0.14154115, 0.19283508, 0.1339858, -0.09116279, 0.15305163, 0.029108394), target2);
	target2 = MulAdd(nf1, MF4x4(-0.14902157, -0.03339153, -0.08532003, -0.10736339, 0.08702709, 0.07607574, -0.09955836, -0.016585784, -0.030078214, -0.060374748, -0.2854279, 0.02441719, 0.034877967, 0.2099041, 0.11125731, -0.059071556), target2);
	target2 = MulAdd(ng1, MF4x4(-0.08436325, 0.06893047, -0.045362443, -0.02237741, -0.07583875, -0.034830183, -0.024008518, -0.2882329, -0.011109783, 0.101859994, 0.091137715, 0.0020565533, -0.044729806, -0.18168025, 0.069466636, 0.04994174), target2);
	target2 = MulAdd(nh1, MF4x4(0.11915174, 0.089596465, -0.18965814, 0.015218237, 0.13500094, 0.19921367, -0.008298205, 0.29650384, -0.049439427, -0.27590424, 0.36169067, -0.030582754, 0.02151196, 0.019915426, 0.04543398, 0.16126189), target2);
	target2 = MulAdd(ni1, MF4x4(0.1620274, -0.08264547, 0.082442135, -0.0034478644, 0.09888509, -0.0034957859, -0.107241705, -0.17729597, -0.05138647, 0.02052103, -0.019507123, 0.037574988, -0.1694345, 0.17871588, -0.22510391, 0.019049853), target2);
	target2 = MulAdd(na2, MF4x4(-0.10962245, -0.1329873, -0.060855392, 0.025941676, -0.19536193, -0.120365486, -0.04313703, -0.052912965, 0.20854498, 0.08341353, 0.008687068, -0.20432276, 0.15677948, -0.19000018, 0.01821201, -0.041512605), target2);
	target2 = MulAdd(nb2, MF4x4(0.012287526, -0.14180368, -0.098788455, 0.025949089, 0.09442778, 0.2247651, -0.12453263, 0.10435483, 0.274603, 0.06133054, 0.10506106, 0.14727746, -0.048299775, -0.082819685, 0.07319359, -0.047460355), target2);
	target2 = MulAdd(nc2, MF4x4(-0.070726536, -0.034744017, 0.07521428, 0.070649154, -0.05958955, -0.100232825, -0.010651838, 0.045392875, 0.2930271, -0.04952355, 0.3112155, 0.117203265, 0.025166962, 0.11176862, 0.06716659, 0.07175864), target2);
	target2 = MulAdd(nd2, MF4x4(-0.011560962, -0.14032063, -0.17424704, 0.07652749, -0.04220116, 0.052874275, -0.00225693, -0.031843517, -0.07520102, -0.13775803, 0.2449317, 0.069658786, 0.052280303, -0.105218224, 0.03574522, -0.020500354), target2);
	target2 = MulAdd(ne2, MF4x4(0.08793712, 0.26712346, 0.08315631, 0.23813692, -0.04439029, 0.031587064, 0.09561177, -0.13380238, -0.24982157, 0.31701845, -0.3875432, 0.10487225, 0.09201869, -0.037252493, -0.006935219, -0.14650282), target2);
	target2 = MulAdd(nf2, MF4x4(0.077635325, 0.13732299, -0.071563005, 0.096517466, -0.15051986, -0.111744404, 0.03996857, -0.052670125, -0.1819665, 0.054554947, -0.13774712, -0.20061246, -0.0023742192, 0.15647805, -0.024121126, 0.075497724), target2);
	target2 = MulAdd(ng2, MF4x4(0.0073632775, -0.06535298, 0.039895996, 0.20666869, 0.13625242, 0.04823007, -0.07135618, 0.04787906, 0.01383074, 0.15382123, -0.15519714, 0.056721795, 0.061946746, -0.0586851, 0.028934354, -0.02264129), target2);
	target2 = MulAdd(nh2, MF4x4(-0.19791882, -0.111910924, -0.010451344, -0.30566537, -0.1416239, -0.14523096, 0.116883226, -0.18241516, 0.2680614, -0.18487626, 0.17472346, 0.08346682, -0.14510359, -0.029229192, -0.005879142, 0.050247498), target2);
	target2 = MulAdd(ni2, MF4x4(0.030153519, -0.092469186, -0.022912916, 0.10200855, -0.04237032, -0.05917764, 0.10479645, -0.05619482, -0.18949397, -0.019547248, 0.013868889, -0.1524476, 0.14048979, -0.032521486, 0.1322921, 0.070972025), target2);

	MF3 target3 = tex5.SampleLevel(sam, pos, 0).rgb;
	target3 = MulAdd(e1, MF4x3(0.07868885, -0.030913834, -0.009213676, 0.04870991, 0.021467991, 0.038739506, -0.042969644, -0.07122453, -0.08798675, -0.09784122, 0.021434791, 0.02510374), target3);
	target3 = MulAdd(e2, MF4x3(0.050420716, 0.0729716, 0.076532185, -0.019112485, -0.01037939, -0.026948035, -0.02591423, 0.008927897, -0.00042541025, 0.1043701, -0.0071186824, -0.041817162), target3);
	target3 = MulAdd(ne1, MF4x3(-0.16143242, -0.0009298223, -0.01228508, 0.07744052, -0.018313263, -0.0488145, 0.09241393, 0.07128674, 0.055164956, 0.054884013, -0.04834418, -0.06281626), target3);
	target3 = MulAdd(ne2, MF4x3(-0.049036566, -0.05979936, -0.05594288, -0.014564307, 0.031926468, 0.037857566, 0.015474487, -0.11385003, -0.11527764, -0.07076006, 0.057038613, 0.095983796), target3);

	tex3[gxy] = target1;
	tex4[gxy] = target2;
	tex6[gxy] = MF4(target3, 1);
}


//!PASS 5
//!DESC Conv-4x3x3x16
//!IN tex3, tex4, tex6
//!OUT tex1, tex2, tex5
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass5(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	MF4 a1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d1 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e1 = tex3.SampleLevel(sam, pos, 0);
	MF4 f1 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na1 = max(-a1, 0);
	MF4 nb1 = max(-b1, 0);
	MF4 nc1 = max(-c1, 0);
	MF4 nd1 = max(-d1, 0);
	MF4 ne1 = max(-e1, 0);
	MF4 nf1 = max(-f1, 0);
	MF4 ng1 = max(-g1, 0);
	MF4 nh1 = max(-h1, 0);
	MF4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);

	MF4 a2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d2 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e2 = tex4.SampleLevel(sam, pos, 0);
	MF4 f2 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na2 = max(-a2, 0);
	MF4 nb2 = max(-b2, 0);
	MF4 nc2 = max(-c2, 0);
	MF4 nd2 = max(-d2, 0);
	MF4 ne2 = max(-e2, 0);
	MF4 nf2 = max(-f2, 0);
	MF4 ng2 = max(-g2, 0);
	MF4 nh2 = max(-h2, 0);
	MF4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);

	MF4 target1 = { -0.034743138, 0.012946433, -0.082333155, 0.07721756 };
	target1 = MulAdd(a1, MF4x4(-0.06738501, 0.034009207, -0.21538448, 0.14296548, 0.12896985, -0.23526315, -0.08848608, 0.019602662, 0.14937137, 0.11353096, 0.11884168, -0.016765572, 0.030985225, 0.046430565, 0.06614828, -0.19202724), target1);
	target1 = MulAdd(b1, MF4x4(-0.10326068, 0.11014975, 0.17069744, -0.21474148, 0.16761585, 0.13434832, -0.101021074, 0.006307025, 0.07478008, -0.1060066, 0.035315692, 0.033488914, -0.24906659, 0.06269967, 0.11120735, -0.040928528), target1);
	target1 = MulAdd(c1, MF4x4(0.09334615, 0.057705753, 0.12213245, -0.06402275, 0.30694544, 0.034585163, 0.20345578, 0.07489286, 0.07483618, -0.14240396, 0.034846418, -0.03811241, 0.010882573, 0.13204294, 0.017563924, -0.047203008), target1);
	target1 = MulAdd(d1, MF4x4(-0.21673942, -0.024010994, -0.10238504, -0.041160326, 0.06838163, -0.20950818, 0.06526309, -0.079094924, 0.02208821, -0.28130978, 0.086275116, -0.089067616, 0.12133826, -0.062600106, -0.020521903, -0.07654401), target1);
	target1 = MulAdd(e1, MF4x4(-0.03055029, -0.15683146, -0.20331301, -0.06252028, 0.13350682, 0.20338707, 0.038425338, 0.1581342, -0.27322498, -0.14999662, -0.16681097, 0.0971585, -0.20014858, -0.081635274, -0.0781877, -0.20625232), target1);
	target1 = MulAdd(f1, MF4x4(0.38375977, -0.019825654, 0.1886721, 0.22616312, 0.3402173, 0.1825304, -0.05531195, 0.30973226, -0.2676023, 0.14413352, 0.021706983, 0.01732799, 0.23466855, -0.13805965, 0.22570935, 0.018103868), target1);
	target1 = MulAdd(g1, MF4x4(-0.15169825, 0.0270689, -0.2503316, 0.17289825, -0.16437647, 0.039233048, -0.35572487, -0.048393793, 0.19270042, 0.24260359, 0.12041881, -0.0009793913, 0.11656858, 0.11007414, -0.0757491, 0.047933612), target1);
	target1 = MulAdd(h1, MF4x4(-0.18657999, -0.11252566, -0.05237504, -0.07368097, 0.13882741, -0.13710637, -0.006996468, -0.062354874, 0.23452504, 0.15333645, -0.0022776406, -0.17910439, 0.03629509, -0.16264829, -0.010011833, -0.15313338), target1);
	target1 = MulAdd(i1, MF4x4(-0.060544558, -0.04913478, -0.061717357, 0.02323648, 0.28739056, -0.07434013, 0.19110644, 0.100050166, 0.0073363045, 0.08185653, -0.024797903, -0.14424153, -0.20838726, 0.16154376, -0.048517212, -0.025453888), target1);
	target1 = MulAdd(a2, MF4x4(0.14975396, -0.13142908, 0.36210674, -0.054021083, -0.10632155, 0.045697935, -0.18946633, 0.02228141, -0.08919603, 0.09800842, -0.17634438, 0.09512711, -0.03425503, -0.12298555, -0.05354435, -0.17112055), target1);
	target1 = MulAdd(b2, MF4x4(0.09958265, -0.057276618, -0.16262266, -0.06415915, 0.14579074, -0.36784375, 0.08034197, -0.04537706, 0.005460582, 0.22313322, 0.07382161, 0.014990379, 0.044636846, -0.2811128, -0.22621547, -0.06044004), target1);
	target1 = MulAdd(c2, MF4x4(0.10569276, -0.03738662, 0.16100396, 0.058593616, -0.048862137, -0.08796426, 0.20101094, -0.11039573, 0.17196764, -0.04601554, 0.008571281, -0.073729075, 0.051433694, -0.051276565, 0.087334655, -0.0360379), target1);
	target1 = MulAdd(d2, MF4x4(0.011119538, -0.28781965, 0.28637868, -0.1742508, -0.07121849, 0.10379717, 0.012615981, -0.029563965, -0.18678424, 0.05291095, 0.039143506, -0.028248642, -0.014103922, 0.029155696, 0.10433492, 0.16305852), target1);
	target1 = MulAdd(e2, MF4x4(-0.2231037, -0.13697462, -0.29124337, 0.08519773, 0.15893684, -0.17763218, 0.06950923, 0.34361118, -0.024844287, 0.044008408, -0.033844844, -0.086971916, -0.07884748, 0.2543499, 0.056884114, 0.10068364), target1);
	target1 = MulAdd(f2, MF4x4(-0.07710048, -0.23218372, 0.04346047, 0.21769643, 0.06473219, -0.18066105, -0.2511205, 0.15309611, 0.04535977, 0.16450433, 0.10846344, 0.0016952346, -0.010874939, 0.28966382, -0.121990964, 0.12956186), target1);
	target1 = MulAdd(g2, MF4x4(-0.007910202, 0.17766511, 0.14364475, 0.1016258, 0.0051045395, 0.18691733, 0.005813767, -0.0070582186, 0.019418601, -0.1604435, 0.016088275, -0.18265302, -0.15719391, -0.17369832, -0.036745597, -0.19647408), target1);
	target1 = MulAdd(h2, MF4x4(0.08938396, -0.0073808245, 0.11225727, -0.012303106, 0.096785046, 0.030483445, 0.027719889, -0.052584838, -0.14887555, -0.03422243, 0.12646855, -0.1722482, 0.010239037, 0.06406088, -0.20053658, 0.01964698), target1);
	target1 = MulAdd(i2, MF4x4(-0.120734036, -0.12450362, -0.06582111, 0.1639675, -0.19787048, -0.08049789, -0.014257596, 0.058436662, -0.0009387449, -0.08698089, -0.017400503, 0.06295286, 0.09890349, -0.057190523, -0.103520766, -0.04207548), target1);
	target1 = MulAdd(na1, MF4x4(-0.0118413875, -0.031288836, 0.09749554, -0.012266401, -0.07998591, 0.22615653, -0.06207416, 0.03257896, -0.076378696, -0.079426095, -0.13968349, -0.15423697, -0.1091681, -0.02893125, -0.032659534, -0.063735925), target1);
	target1 = MulAdd(nb1, MF4x4(0.119372696, 0.013176554, -0.029381052, 0.21919228, 0.045041792, 0.24844484, 0.26363325, 0.08480674, 0.087083444, 0.11984778, -0.088715754, 0.06421046, 0.05225977, -0.05140334, -0.055052705, -0.049854077), target1);
	target1 = MulAdd(nc1, MF4x4(0.0035781674, 0.0861361, -0.07675145, -0.056479637, 0.16973391, -0.12113791, 0.10729832, -0.03773517, 0.058618728, 0.12148276, 0.17260705, -0.06968724, 0.076358154, -0.15307103, 0.17700425, -0.13467014), target1);
	target1 = MulAdd(nd1, MF4x4(-0.02752418, -0.06366472, -0.025610954, 0.0013539721, -0.06465272, 0.0806373, -0.07336035, 0.10114861, 0.0041146413, 0.15878421, -0.044668555, -0.12150811, -0.1071482, -0.05086587, 0.18589285, 0.05065092), target1);
	target1 = MulAdd(ne1, MF4x4(0.07200056, 0.021739854, 0.29476613, -0.08475931, 0.15018553, -0.07886365, 0.36336347, -0.020576432, 0.25866082, -0.059272554, 0.054249667, -0.17822553, 0.1755872, 0.3244387, -0.39173844, 0.33894604), target1);
	target1 = MulAdd(nf1, MF4x4(-0.11570926, 0.1342677, -0.19511898, 0.0075454637, -0.01890476, -0.14239742, 0.18921931, 0.033990458, 0.31306365, -0.006998358, 0.029190077, -0.005679954, -0.15341778, 0.07766778, -0.25691047, -0.0964161), target1);
	target1 = MulAdd(ng1, MF4x4(0.019746238, 0.0021332854, -0.00879096, -0.1338671, -0.0001600663, -0.29465106, 0.0867611, -0.114963025, 0.07874301, -0.012734178, -0.11124061, -0.010926616, -0.04941506, -0.07516841, 0.116663, -0.29018974), target1);
	target1 = MulAdd(nh1, MF4x4(-0.01651721, 0.05955898, 0.023618208, 0.098695934, 0.018553663, -0.054378513, 0.1436929, 0.1693743, -0.27483663, 0.029127488, 0.09619316, -0.06109113, -0.08619361, 0.09315214, -0.02478657, 0.18544984), target1);
	target1 = MulAdd(ni1, MF4x4(0.09570196, -0.016528936, -0.1559397, 0.14312246, 0.04029428, 0.08773151, -0.043646842, 0.17894371, -0.082413055, 0.0027082344, -0.100171275, 0.01547501, 0.18122818, -0.11933676, 0.26404107, -0.3169703), target1);
	target1 = MulAdd(na2, MF4x4(-0.12073344, 0.08683522, -0.09249099, 0.058786053, -0.14480567, -0.121013954, 0.033335857, 0.009353379, -0.055087596, -0.13002734, 0.08890566, 0.05508963, -0.0075715426, -0.15936922, -0.03968994, -0.1690259), target1);
	target1 = MulAdd(nb2, MF4x4(0.2011206, 0.23898427, 0.23656492, 0.1287573, 0.14850396, 0.40532517, -0.107408255, 0.40119782, 0.099813245, -0.03830304, 0.101520434, -0.026478073, -0.048469637, 0.106440455, 0.056632314, -0.17825997), target1);
	target1 = MulAdd(nc2, MF4x4(-0.076735444, 0.05965795, -0.0052469415, -0.21785147, 0.11887833, 0.067560315, 0.051149055, 0.23626682, -0.1297049, -0.035512198, 0.20352256, -0.025064934, 0.04958706, 0.0454198, 0.0113334535, 0.0417486), target1);
	target1 = MulAdd(nd2, MF4x4(-0.09055751, 0.033915352, -0.21836667, 0.22006813, -0.099022895, 0.11720966, -0.15686816, -0.13586599, -0.094427735, -0.08831514, -0.06182928, 0.09213704, -0.03642064, 0.18129414, -0.012926811, 0.12179882), target1);
	target1 = MulAdd(ne2, MF4x4(0.19389409, 0.09512252, 0.14768016, -0.16623649, -0.031052284, -0.026814984, 0.106168024, -0.2026781, -0.04581419, -0.0016849053, -0.04101923, 0.038959503, -0.011938445, 0.20096186, -0.26666564, 0.4824324), target1);
	target1 = MulAdd(nf2, MF4x4(0.17727576, 0.07309147, 0.12131863, -0.163096, 0.17225246, 0.26256254, 0.27685758, 0.09094053, 0.029605515, -0.20217367, 0.047564875, 0.043115832, 0.15089568, -0.09670934, 0.24131384, 0.03337442), target1);
	target1 = MulAdd(ng2, MF4x4(-0.34192136, 0.12063195, -0.31159517, 0.04170889, -0.30147067, -0.21330686, -0.1514457, -0.121126845, 0.04409098, 9.2206596e-05, 0.027680017, 0.03230512, -0.27993527, -0.093485355, 0.07568645, -0.23585452), target1);
	target1 = MulAdd(nh2, MF4x4(0.0537712, -0.20847629, 0.1740093, -0.013894753, -0.32719997, -0.059484575, -0.006098233, -0.10336451, -0.14706188, -0.07424865, -0.07045905, 0.17093194, -0.22147557, 0.09086218, -0.11033544, -0.05306482), target1);
	target1 = MulAdd(ni2, MF4x4(0.00489003, -0.11509064, -0.021005848, 0.16637677, -0.089347586, 0.17545725, -0.17313693, 0.13742085, -0.14577347, 0.07951095, -0.092139855, 0.017118992, -0.053472433, 0.079414465, 0.0330263, -0.11189824), target1);
	
	MF4 target2 = { 0.08895955, -0.027667087, 0.20500831, 0.00037762933 };
	target2 = MulAdd(a1, MF4x4(-0.25835788, 0.050451655, -0.1845038, -0.07232528, 0.1323318, 0.26276684, 0.10842882, -0.083056524, 0.17426784, -0.3594826, 0.2728965, 0.08388844, -0.004007842, 0.020535901, -0.051425606, 0.07750436), target2);
	target2 = MulAdd(b1, MF4x4(-0.11410436, 0.014572361, -0.27057216, -0.023974562, 0.05234827, 0.15328228, -0.17502303, -0.3199359, 0.12188045, -0.095813684, 0.024145132, 0.0856916, -0.027453909, -0.043129764, 0.16971985, 0.021623038), target2);
	target2 = MulAdd(c1, MF4x4(0.06611095, 0.038625732, -0.13717118, -0.04497733, 0.15213469, 0.04770935, 0.0729271, -0.062052976, 0.004571303, 0.035141192, -0.059409596, 0.044652313, 0.17520894, 0.09665589, -0.1479193, 0.06528058), target2);
	target2 = MulAdd(d1, MF4x4(-0.1845968, 0.091479465, -0.09394898, -0.13545018, -0.029501775, -0.21426639, 0.09255898, 0.1257644, 0.20256902, 0.06267267, 0.10378081, 0.13494423, 0.058310498, 0.03642236, -0.16268995, -0.048100803), target2);
	target2 = MulAdd(e1, MF4x4(0.2155119, -0.3683131, 0.049449228, -0.20559964, -0.11761922, -0.2518804, -0.020712897, 0.12895772, -0.07543782, 0.5805017, -0.11301444, -0.038493153, -0.06710986, -0.09321189, 0.108671665, -0.03259695), target2);
	target2 = MulAdd(f1, MF4x4(0.035307787, 0.108389005, -0.27493554, 0.27029404, 0.25523573, -0.28636125, -0.20766719, -0.008661457, -0.004480811, -0.046390545, -0.16221444, 0.008979624, -0.061375532, 0.035076566, -0.018924266, 0.01380219), target2);
	target2 = MulAdd(g1, MF4x4(-0.051922515, -0.12463486, -0.10383422, 0.02220095, -0.1573033, 0.13980615, 0.13248625, -0.16803266, -0.0692132, -0.21552645, 0.13744529, 0.23034313, 0.0052666534, 0.028977966, 0.07720251, -0.06477756), target2);
	target2 = MulAdd(h1, MF4x4(-0.14097473, 0.2770271, -0.172289, -0.03000696, -0.028684044, 0.040578447, -0.2290285, 0.082329154, -0.042402364, -0.20926563, 0.08233207, 0.11862443, -0.07038536, -0.02273004, 0.091550544, -0.065856494), target2);
	target2 = MulAdd(i1, MF4x4(0.14879914, -0.023923844, -0.23569296, 0.20306346, 0.17502785, 0.28776234, -0.2788995, 0.10012439, -0.05635638, -0.025840463, 0.09222198, 0.118032, 0.08057015, 0.1286071, 0.060189806, -0.052669708), target2);
	target2 = MulAdd(a2, MF4x4(0.07076086, -0.15111323, -0.07427972, 0.008372168, -0.17791592, -0.16254742, 0.013961132, -0.0944912, -0.23380096, 0.17377278, -0.09683394, 0.019931393, -0.12042098, 0.0016406325, 0.09393333, -0.06882231), target2);
	target2 = MulAdd(b2, MF4x4(0.21465093, 0.04142968, 0.06840044, -0.37831602, -0.05549571, 0.044905066, -0.07873589, -0.026804, -0.34764197, 0.022487951, -0.077293746, 0.089457795, -0.110094436, 0.24233972, 0.06285107, -0.10851744), target2);
	target2 = MulAdd(c2, MF4x4(0.093270175, 0.084138945, 0.03938272, 0.063565865, -0.010733802, 0.13554469, -0.06650261, 0.033002816, 0.011187271, -0.12821455, 0.20785914, -0.030438649, -0.124710515, -0.022294303, 0.09732408, 0.057609864), target2);
	target2 = MulAdd(d2, MF4x4(-0.12833868, 0.021577539, -0.02700365, 0.11799592, -0.03655647, -0.04225167, 0.11049353, -0.16036157, 0.049277548, -0.033842396, 0.10020137, 0.095509745, 0.08060231, -0.09237418, -0.035598125, -0.035926737), target2);
	target2 = MulAdd(e2, MF4x4(-0.32829186, 0.3492363, 0.030671779, -0.12606762, 0.010437313, 0.2757115, -0.21517593, -0.15800527, -0.12592544, -0.20578934, 0.10444053, 0.12993255, -0.046079267, 0.03834173, -0.19277227, -0.22124454), target2);
	target2 = MulAdd(f2, MF4x4(-0.052546192, 0.026082167, 0.13831234, 0.10982424, 0.012946818, -0.12439852, 0.10134106, -0.10050398, -0.04472338, -0.14325236, -0.20579574, 0.0044005127, 0.22013672, -0.32955512, 0.12404084, -0.008160738), target2);
	target2 = MulAdd(g2, MF4x4(-0.10774314, -0.31650826, -0.06601711, 0.19635755, -0.12622592, -0.06396423, 0.13856032, 0.16540553, 0.021387719, 0.23377723, -0.053738154, -0.1000186, -0.08338395, -0.052813534, 0.008122962, 0.13732094), target2);
	target2 = MulAdd(h2, MF4x4(-0.18270823, 0.06966014, -0.17788303, -0.27303055, -0.077971615, 0.013978423, -0.02039098, 0.12715338, -0.11924171, 0.18900296, -0.085199654, 0.215198, 0.18587974, -0.009749325, 0.0173584, -0.12018259), target2);
	target2 = MulAdd(i2, MF4x4(0.052129295, -0.107416354, 0.12711766, 0.03708665, -0.14369462, -0.055359814, -0.16639823, -0.045143317, -0.06925672, -0.040696755, 0.01999809, -0.016040625, -0.02484878, 0.07417094, 0.050875198, 0.2145528), target2);
	target2 = MulAdd(na1, MF4x4(0.055696912, -0.16680926, -0.021987487, 0.024941636, -0.0927883, 0.022136632, 0.033782948, -0.10646058, -0.14944647, 0.25457275, 0.046682496, -0.022462368, -0.07886781, 0.08165927, 0.06848105, 0.0063734027), target2);
	target2 = MulAdd(nb1, MF4x4(0.037053242, 0.033215813, 0.18291366, 0.12340375, 0.08491059, -0.28442004, -0.0127422465, -0.039834313, -0.23321372, 0.26676926, -0.05636355, -0.15672484, -0.12891728, -0.15486577, -0.032004442, -0.092745155), target2);
	target2 = MulAdd(nc1, MF4x4(0.015779478, -0.18457565, 0.24996394, 0.036197674, 0.15694007, 0.15863103, -0.07332398, 0.0016235278, -0.15536517, -0.056062788, 0.14102836, 0.16915025, -0.08001087, 0.07073164, 0.13796777, 0.123867124), target2);
	target2 = MulAdd(nd1, MF4x4(0.045792986, -0.15135059, -0.1354885, -0.043678258, -0.35655212, 0.51232076, -0.12816145, -0.046569496, -0.014127674, -0.06282611, -0.098873, -0.06359104, -0.0919222, 0.11822437, 0.079254694, 0.00579688), target2);
	target2 = MulAdd(ne1, MF4x4(-0.15683417, 0.61610246, -0.3024612, 0.12917964, -0.09303367, 0.23612969, -0.40842506, -0.12374661, -0.07572449, -0.2613284, -0.09970177, -0.015227848, 0.106239066, -0.21411185, 0.051998455, -0.1364518), target2);
	target2 = MulAdd(nf1, MF4x4(0.23850034, -0.14394449, -0.0031468747, -0.2380617, -0.027200876, -0.041352056, -0.01864445, 0.033848196, -0.12064239, -0.110480845, 0.08450956, -0.22328654, 0.17664163, 0.22268307, 0.050886698, -0.17475672), target2);
	target2 = MulAdd(ng1, MF4x4(-0.17808256, 0.010803805, 0.03315186, 0.033143792, -0.14205995, 0.25039625, -0.08784382, -0.13454252, 0.19576813, 0.10755282, 0.22821628, 0.019456752, -0.0422955, -0.016182603, -0.12066697, 0.0548465), target2);
	target2 = MulAdd(nh1, MF4x4(0.11563777, -0.257929, 0.0010403778, 0.080267854, -0.0025255163, 0.2855168, -0.060352214, -0.07816255, -0.00090574916, 0.049510725, 0.03720483, 0.059250016, -0.08674136, 0.20522198, -0.28694284, 0.1299507), target2);
	target2 = MulAdd(ni1, MF4x4(-0.14638457, 0.04063328, 0.03139636, -0.007934521, 0.07689684, -0.09467145, 0.10607347, 0.054510128, 0.003306194, 0.05347124, 0.062762424, -0.041480847, -0.07677865, -0.139573, 0.010972524, 0.21957156), target2);
	target2 = MulAdd(na2, MF4x4(-0.026845628, -0.043439507, 0.034738723, 0.07281683, 0.14474197, 0.031586993, -0.22767854, -0.0707655, 0.105201736, -0.28805482, 0.008668302, -0.16329518, 0.06157049, 0.3803886, 0.26345953, -0.011096537), target2);
	target2 = MulAdd(nb2, MF4x4(-0.23328833, 0.085731484, -0.07755016, 0.33559516, 0.07704345, 0.115106605, -0.24114038, -0.44630137, 0.2726737, -0.32170138, -0.009236524, -0.11666051, 0.0457048, 0.07876708, 0.13134004, -0.035318643), target2);
	target2 = MulAdd(nc2, MF4x4(-0.05140272, 0.011605703, 0.13899171, -0.05071015, 0.18413687, -0.31413674, -0.13043414, -0.15118152, -0.15326938, -0.10720126, -0.23738635, 0.13481396, 0.25115076, -0.009316611, -0.2584441, -0.14389823), target2);
	target2 = MulAdd(nd2, MF4x4(-0.039723795, -0.14869407, -0.1692942, 0.026501274, -0.10685166, -0.121267825, -0.08584318, -0.09580693, -0.10626739, -0.068417974, 0.11321909, -0.13664317, 0.061380867, -0.2587898, 0.14850819, 0.008178645), target2);
	target2 = MulAdd(ne2, MF4x4(0.06912782, 0.24230564, -0.048150286, 0.2203717, -0.17417085, 0.105546735, -0.16648416, -0.0045053074, 0.09764028, 0.37122592, -0.1939995, -0.27899942, -0.088152565, -0.53869057, 0.21676709, -0.08056594), target2);
	target2 = MulAdd(nf2, MF4x4(0.07651754, 0.03704878, -0.0197015, 0.1660726, 0.07002748, -0.11820414, -0.23360898, 0.1481592, 0.029847002, 0.054057185, 0.013176299, 0.06552942, -0.13865773, -0.20105527, -0.37550658, 0.005769631), target2);
	target2 = MulAdd(ng2, MF4x4(-0.22697811, -0.17426412, 0.10148018, 0.008134666, 0.10771455, 0.16943407, -0.016319012, -0.40176705, -0.06854668, -0.049045276, 0.20919096, 0.13240765, -0.050125647, 0.14902508, 0.052697595, -0.13817468), target2);
	target2 = MulAdd(nh2, MF4x4(0.04301619, 0.23184754, -0.023551717, 0.3768405, 0.028999053, 0.06709736, -0.05993663, -0.059861984, 0.15499207, -0.22217415, 0.111131504, -0.09082529, -0.19389243, 0.024621522, -0.15305442, 0.010799284), target2);
	target2 = MulAdd(ni2, MF4x4(-0.035496738, 0.010802548, -0.028718363, 0.19263634, 0.16900502, -0.16661702, -0.027631328, 0.18309957, -0.015860107, -0.03309961, -0.091390446, 0.14000848, -0.0036591904, 0.47659522, -0.09373507, -0.29020965), target2);

	MF3 target3 = tex6.SampleLevel(sam, pos, 0).rgb;
	target3 = MulAdd(e1, MF4x3(0.03094887, -0.008734403, 0.00042712069, 0.053891554, 0.05837673, 0.06200635, 0.09071558, -0.04202184, -0.046172567, -0.0425916, 0.04905093, 0.020835675), target3);
	target3 = MulAdd(e2, MF4x3(0.096628904, -0.037792254, -0.043241944, -0.011923947, -0.025950424, -0.031381752, -0.060941868, -0.07859433, -0.07535451, -0.026777223, 0.08604982, 0.07829908), target3);
	target3 = MulAdd(ne1, MF4x3(-0.06435972, 0.0036599538, 0.00786578, -0.061972067, -0.05681472, -0.06667608, -0.106890626, 0.007406496, 0.029977169, -0.20519382, -0.044860814, 0.0021225857), target3);
	target3 = MulAdd(ne2, MF4x3(-0.16876474, 0.012789643, 0.026692612, 0.017817136, 0.026935097, 0.02227043, 0.01690181, 0.07716103, 0.086527, 0.07923805, -0.10443151, -0.10859543), target3);

	tex1[gxy] = target1;
	tex2[gxy] = target2;
	tex5[gxy] = MF4(target3, 1);
}


//!PASS 6
//!DESC Conv-4x3x3x16
//!IN tex1, tex2, tex5
//!OUT tex3, tex4, tex6
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass6(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	MF4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e1 = tex1.SampleLevel(sam, pos, 0);
	MF4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na1 = max(-a1, 0);
	MF4 nb1 = max(-b1, 0);
	MF4 nc1 = max(-c1, 0);
	MF4 nd1 = max(-d1, 0);
	MF4 ne1 = max(-e1, 0);
	MF4 nf1 = max(-f1, 0);
	MF4 ng1 = max(-g1, 0);
	MF4 nh1 = max(-h1, 0);
	MF4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);

	MF4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e2 = tex2.SampleLevel(sam, pos, 0);
	MF4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na2 = max(-a2, 0);
	MF4 nb2 = max(-b2, 0);
	MF4 nc2 = max(-c2, 0);
	MF4 nd2 = max(-d2, 0);
	MF4 ne2 = max(-e2, 0);
	MF4 nf2 = max(-f2, 0);
	MF4 ng2 = max(-g2, 0);
	MF4 nh2 = max(-h2, 0);
	MF4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);

	MF4 target1 = { -0.043347504, -0.20504741, -0.037821215, -0.014486937 };
	target1 = MulAdd(a1, MF4x4(0.018134737, -0.2296755, -0.07276725, -0.029795367, 0.05382051, 0.092847414, -0.024469728, -0.1674685, 0.0017946451, 0.30074653, 0.0034195695, -0.04892261, 0.18229689, -0.20116119, -0.12702174, -0.08259108), target1);
	target1 = MulAdd(b1, MF4x4(-0.1357695, -0.08149211, 0.09314453, -0.21966846, 0.34740716, 0.043606415, 0.04225903, 0.034449834, 0.17248215, 0.39148283, -0.13868807, -0.010550686, 0.044238456, -0.09693464, -0.005044985, 0.24383289), target1);
	target1 = MulAdd(c1, MF4x4(0.19959371, 0.098685324, 0.058746945, 0.010580748, 0.08051514, 0.031898864, 0.017556064, 0.13004355, -0.01727653, 0.11044019, 0.040673427, -0.20064595, -0.23321067, 0.06398686, -0.19126236, -0.2430858), target1);
	target1 = MulAdd(d1, MF4x4(-0.12870286, -0.113455534, 0.23722827, 0.070718594, 0.19049989, -0.1927299, -0.06343845, 0.113127775, 0.082530305, -0.10972526, -0.090779535, 0.05731582, 0.11018802, -0.18049154, 0.09269507, -0.10304576), target1);
	target1 = MulAdd(e1, MF4x4(0.15513484, 0.06659583, 0.08125296, -0.012350324, -0.09492788, 0.5048303, 0.13206847, 0.39554298, 0.28953737, -0.20913891, -0.26781562, -0.17539899, 0.023778774, 0.29716817, 0.15768486, 0.37702608), target1);
	target1 = MulAdd(f1, MF4x4(0.0724462, 0.015571356, -0.032217246, 0.0050658924, -0.22708446, 0.03968809, 0.016753826, 0.0025668752, -0.055932112, 0.113931604, 0.19766758, -0.030027265, -0.17384295, 0.15013468, -0.0070017707, -0.09469028), target1);
	target1 = MulAdd(g1, MF4x4(-0.078361556, -0.0954201, -0.006358101, 0.040500037, 0.4190454, -0.17622913, -0.07234791, 0.05462559, 0.18641087, 0.058313597, -0.0180785, 0.13818781, -0.14640772, 0.0699486, 0.0073663946, -0.076789856), target1);
	target1 = MulAdd(h1, MF4x4(-0.21421191, 0.08736062, 0.09041226, 0.03608585, 0.02769972, 0.09641289, 0.11824623, 0.05653645, 0.16464607, 0.19839554, -0.13379547, 0.054417104, 0.067530684, 0.18971571, 0.13785432, -0.097639814), target1);
	target1 = MulAdd(i1, MF4x4(-0.32658005, -0.14606023, -0.069448665, 0.032998275, -0.28331423, 0.0011900732, -0.020304207, -0.13535896, 0.08298347, 0.045509677, -0.030503955, -0.037504148, 0.049955815, 0.0925771, 0.00058534974, -0.12398032), target1);
	target1 = MulAdd(a2, MF4x4(-0.2955836, 0.29059318, -0.018196672, -0.35866606, -0.01309431, 0.03540315, 0.010609202, 0.11956812, 0.10296229, 0.22536302, 0.015201129, -0.23797737, -0.16960852, -0.11414787, -0.034440614, 0.112644605), target1);
	target1 = MulAdd(b2, MF4x4(-0.14952518, 0.07024436, -0.083184876, -0.0814617, -0.13303639, 0.016159372, -0.13521518, 0.2221334, -0.056617837, 0.12958299, 0.064461656, -0.20146395, -0.16023181, 0.2640758, 0.27528805, -0.1426518), target1);
	target1 = MulAdd(c2, MF4x4(-0.04382363, 0.09856003, -0.08561442, -0.15699928, -0.121069774, 0.04685383, -0.009170197, -0.031489655, 0.18730178, 0.238442, 0.22497098, 0.032015145, -0.03709115, 0.1535079, 0.21674158, 0.10678019), target1);
	target1 = MulAdd(d2, MF4x4(-0.12200952, 0.24224263, 0.034097504, -0.028179523, -0.011962496, -0.04489487, -0.05198827, 0.22194928, -0.045400873, -0.049828544, 0.111477956, -0.098361604, 0.12788995, -0.016093334, -0.19886433, -0.011161484), target1);
	target1 = MulAdd(e2, MF4x4(0.30563712, 0.013071727, -0.004799883, 0.12888052, -0.259498, -0.041566677, 0.07311124, 0.162324, 0.28371668, -0.004693743, -0.0019395344, 0.029358242, 0.08730285, 0.12184509, 0.05508437, 0.048439097), target1);
	target1 = MulAdd(f2, MF4x4(0.12760857, 0.115813166, -0.217695, -0.10629871, -0.227366, 0.09030426, -0.15313712, 0.020528946, -0.20743734, 0.088583544, 0.04594053, -0.22891994, 0.18949282, -0.042186577, -0.17330512, -0.010711361), target1);
	target1 = MulAdd(g2, MF4x4(0.029503195, 0.0063797613, -0.17004286, -0.096844055, 0.010218098, 0.04247233, 0.02362808, 0.14700809, -0.08082364, 0.11159672, -0.018505255, -0.15228583, 0.15693732, -0.025359154, 0.024829186, 0.1943192), target1);
	target1 = MulAdd(h2, MF4x4(-0.03912932, -0.21989027, 0.12203028, 0.18702275, -0.118537985, 0.21039696, 0.09102061, 0.012288879, 0.031666897, 0.1318455, -0.04901404, -0.07516063, -0.44782668, 0.04884501, 0.047070876, 0.008728358), target1);
	target1 = MulAdd(i2, MF4x4(-0.08669101, 0.3053463, -0.08963947, 0.0034188698, -0.070004664, 0.064788476, 0.093737036, 0.070050925, 0.12728429, -0.13179256, -0.014913502, 0.09308136, -0.027638942, 0.008638711, 0.08794172, -0.05531093), target1);
	target1 = MulAdd(na1, MF4x4(0.0728421, 0.07872358, 0.11454748, 0.08497922, 0.071820416, -0.11789207, -0.08184197, 0.1359588, -0.2143346, -0.05876081, 0.023172129, -0.08430511, -0.19276723, 0.14283359, 0.15604696, -0.055187486), target1);
	target1 = MulAdd(nb1, MF4x4(0.068641685, 0.2732106, -0.2809107, 0.12736696, -0.08642367, 0.023898933, -0.17859498, -0.18299665, -0.06684587, -0.12204666, 0.45898953, -0.24240111, 0.25182098, -0.04395751, 0.10637211, -0.22135144), target1);
	target1 = MulAdd(nc1, MF4x4(0.0852072, 0.051133018, 0.03333165, -0.0008938216, 0.10251267, 0.0550774, 0.041769378, -0.21259712, 0.286912, 0.123342015, 0.282759, -0.0730124, 0.14275575, -0.15580742, -0.15224406, 0.045376908), target1);
	target1 = MulAdd(nd1, MF4x4(0.03328225, 0.11563978, -0.07451964, 0.030546209, -0.04698351, -0.18544962, 0.037350416, 0.13969816, 0.0556746, -0.06359919, 0.06478219, -0.031694926, 0.13396506, 0.09443612, -0.01922686, -0.06290365), target1);
	target1 = MulAdd(ne1, MF4x4(0.07495407, 0.063429266, -0.106221214, -0.085107304, 0.2497817, -0.46598253, -0.18833177, -0.2731128, -0.13024822, 0.56053543, 0.055704467, -0.12331414, -0.031199086, 0.05061188, 0.22097112, -0.6611177), target1);
	target1 = MulAdd(nf1, MF4x4(0.08276988, -0.044184342, -0.03562185, -0.06159881, 0.27694225, -0.07192965, -0.08663714, 0.020221777, 0.14095962, -0.06229397, 0.051374253, -0.038158998, 0.10664802, -0.041305423, 0.051260717, -0.054698635), target1);
	target1 = MulAdd(ng1, MF4x4(0.12800686, 0.03485072, 0.039914366, 0.034041498, -0.08305794, -0.046292894, 0.22765331, 0.10904922, 0.0013937047, -0.08750301, 0.009126207, -0.065589435, 0.2837707, 0.08884436, -0.07234862, -0.093502745), target1);
	target1 = MulAdd(nh1, MF4x4(0.113439895, 0.06081726, 0.1122302, -0.022936966, 0.10329637, -0.31816107, -0.051597945, 0.23846027, -0.083913095, -0.29872265, -0.040147282, -0.08981918, -0.04329814, -0.12339693, -0.034489952, 0.013393211), target1);
	target1 = MulAdd(ni1, MF4x4(0.33091688, 0.1726297, 0.034332044, -0.091396205, 0.15434311, -0.0022870845, -0.15506189, 0.08710491, -0.16063525, 0.042252056, 0.017086457, 0.08134797, 0.08631321, 0.037843138, 0.088296555, 0.0064518084), target1);
	target1 = MulAdd(na2, MF4x4(0.09161051, 0.114355795, -0.15304486, -0.030537153, 0.1835368, -0.3287635, 0.031197926, 0.09717476, 0.04276852, 0.113250345, 0.05949038, -0.10599563, 0.43574792, -0.060788117, 0.18409383, 0.12678055), target1);
	target1 = MulAdd(nb2, MF4x4(-0.018356865, -0.0072578182, 0.12020777, -0.013127592, 0.20136636, -0.22984362, 0.06896224, 0.00044982752, 0.008428429, -0.123316936, -0.09989286, 0.078248784, -0.16313677, -0.003020313, -0.46285018, -0.08967125), target1);
	target1 = MulAdd(nc2, MF4x4(-0.03451497, -0.10864502, 0.13207638, 0.17194521, 0.0037514758, -0.20222199, -0.12535086, 0.001511977, 0.056294486, -0.2112898, 0.078261316, 0.10118746, -0.044742294, 0.21793383, -0.19927903, -0.21338293), target1);
	target1 = MulAdd(nd2, MF4x4(-0.034903776, -0.10167085, 0.031066334, 0.0379958, 0.20532596, -0.17457838, 0.16556816, -0.0021619152, 0.02682665, 0.03396325, -0.059273884, 0.1922813, -0.072151475, -0.010240544, 0.2302027, 0.12385962), target1);
	target1 = MulAdd(ne2, MF4x4(-0.20170145, -0.08203941, -0.028107846, -0.18003726, 0.44744352, -0.13190243, 0.13233365, 0.03626546, 0.085763134, -0.25613126, -0.11213388, 0.15529087, -0.271649, 0.050587676, -0.062583975, 0.057289865), target1);
	target1 = MulAdd(nf2, MF4x4(-0.040649455, -0.17949733, 0.35847965, -0.040587306, 0.24314344, -0.23811667, 0.13958354, 0.04961874, 0.09858903, -0.04202913, -0.21850993, 0.0700419, -0.09130745, -0.096835814, 0.0022782686, -0.25416258), target1);
	target1 = MulAdd(ng2, MF4x4(-0.08215545, -0.019647893, 0.055263475, 0.053733055, 0.098485716, -0.1041945, -0.06541415, -0.08868577, -0.07262986, 0.03513784, -0.110529095, -0.03369232, 0.056786604, 0.2569229, -0.05931065, -0.22081214), target1);
	target1 = MulAdd(nh2, MF4x4(0.066926084, 0.029664058, -0.10779271, 0.11026963, 0.23927264, -0.16914488, 0.022947345, 0.12303853, -0.07066212, -0.013205378, 0.15348643, 0.035568032, 0.20966691, 0.010149819, -0.08814468, -0.064854674), target1);
	target1 = MulAdd(ni2, MF4x4(0.11493852, -0.074924305, -0.14840698, -0.16956823, 0.056806292, -0.06387947, -0.06880271, -0.04637334, -0.1929893, 0.18226422, 0.064644486, -0.1594863, 0.027403917, 0.13951495, -0.06569123, -0.07700207), target1);

	MF4 target2 = { -0.011865144, 0.11717201, -0.13823777, -0.059450272 };
	target2 = MulAdd(a1, MF4x4(0.047881734, -0.09396414, -0.2839081, 0.3140853, 0.052613556, 0.09940423, 0.23960467, -0.022228222, -0.12065009, 0.07898222, 0.08657881, 0.010852739, -0.050450284, 0.01683982, 0.031813968, 0.053060856), target2);
	target2 = MulAdd(b1, MF4x4(-0.10252411, -0.03116448, -0.30114275, -0.0316799, -0.017501019, -0.03006003, -0.2095696, 0.10134927, -0.3901916, -0.15335023, -0.11955071, 0.1337449, 0.101239376, -0.25044814, 0.2128469, 0.018979514), target2);
	target2 = MulAdd(c1, MF4x4(-0.13392173, 0.052036732, 0.1682114, -0.026263753, 0.027221246, -0.15121374, 0.13723798, 0.08950682, -0.1182108, -0.07294226, 0.023392374, 0.052329235, -0.05632852, -0.07036173, 0.06872573, 0.05238042), target2);
	target2 = MulAdd(d1, MF4x4(0.18112028, 0.18242362, -0.06812871, 0.032463413, 0.124638766, -0.26765212, -0.07678663, 0.33806562, 0.09674393, 0.15574542, 0.23634006, -0.02873782, -0.1626769, -0.14760062, -0.007274849, 0.09866139), target2);
	target2 = MulAdd(e1, MF4x4(-0.10726673, -0.10925056, 0.19967109, -0.19936769, 0.15942842, -0.14870064, 0.15493345, -0.08489036, -0.49053356, -0.17321263, 0.28426084, 0.18721215, -0.09898434, -0.2751838, -0.11833524, 0.028445128), target2);
	target2 = MulAdd(f1, MF4x4(-0.11788817, -0.23724948, -0.046072144, 0.035621114, 0.04527003, -0.0073492974, 0.11097195, 0.06806836, 0.04814677, -0.1408476, -0.1325629, 0.00929532, -0.16699041, -0.03034791, 0.08320368, -0.15429299), target2);
	target2 = MulAdd(g1, MF4x4(0.2729515, 0.008244692, -0.17441982, -0.39026466, 0.17381759, 0.31194404, 0.055934936, 0.20744409, 0.20119062, 0.0734271, 0.0796807, 0.0031037466, -0.0016392237, 0.033733975, 0.07149338, 0.042083208), target2);
	target2 = MulAdd(h1, MF4x4(0.07985744, 0.10945015, 0.018472541, 0.1397503, 0.2005682, 0.42641, 0.23022486, -0.2916921, 0.028285174, -0.31885162, -0.27070364, -0.10390779, 0.0751492, 0.12752363, -0.2279459, 0.08998453), target2);
	target2 = MulAdd(i1, MF4x4(0.18450491, -0.140783, -0.008006845, 0.09029298, 0.12536179, 0.26949662, 0.09491545, 0.063907005, 0.11212244, 0.09778506, -0.1835966, -0.053119674, 0.0072294096, 0.25018227, 0.010868525, -0.22721334), target2);
	target2 = MulAdd(a2, MF4x4(-0.028011927, -0.20073172, 0.5976166, -0.19494139, 0.17958745, -0.03838646, 0.058325976, -0.29409218, -0.12793432, 0.03245129, 0.35662368, -0.05048354, -0.13368197, -0.06151968, -0.012714591, -0.1763054), target2);
	target2 = MulAdd(b2, MF4x4(0.18468465, 0.31682113, 0.12818255, -0.117110476, 0.13709468, -0.10034022, -0.07994527, -0.1259309, 0.04067299, -0.1147398, 0.28361055, 0.27916273, 0.03696692, 0.16829546, 0.27819383, 0.08305029), target2);
	target2 = MulAdd(c2, MF4x4(-0.28920117, -0.033877946, 0.01586206, 0.04681198, 0.024248574, -0.045777842, -0.03342128, 0.07525412, -0.063377544, -0.016737273, 0.11235511, -0.04325238, -0.24170023, -0.09993599, -0.03205371, 0.14339828), target2);
	target2 = MulAdd(d2, MF4x4(-0.008357902, -0.11038377, 0.03709221, 0.26775306, 0.07963845, -0.25377446, -0.17630441, -0.10966474, 0.057311732, -0.083327, 0.044497233, 0.06903858, -0.26531395, -0.103399664, -0.14806591, 0.269314), target2);
	target2 = MulAdd(e2, MF4x4(0.05450808, -0.041993964, -0.07217651, 0.034468375, 0.2117634, 0.0075620585, 0.05825411, -0.2252478, -0.0527787, 0.049732126, -0.032040413, -0.09361454, 0.29585132, 0.018413153, 0.18384546, -0.024226356), target2);
	target2 = MulAdd(f2, MF4x4(-0.031109914, 0.19351351, 0.07405522, -0.06313074, -0.09983541, -0.011495182, 0.11749038, -0.16775608, 0.2790974, -0.09338754, 0.07913264, 0.103792936, -0.18679164, -0.15639925, 0.112943865, 0.07930375), target2);
	target2 = MulAdd(g2, MF4x4(0.004106195, -0.036833283, 0.12908752, 0.12869535, -0.02472107, 0.17561707, -0.025890926, -0.18789047, 0.096218705, -0.16306408, -0.02198454, -0.010134957, -0.09710009, 0.002062143, -0.046785697, 0.0029441968), target2);
	target2 = MulAdd(h2, MF4x4(0.19648251, -0.015663045, -0.0730215, 0.028611008, 0.13529862, -0.015256192, -0.04119306, -0.24628192, 0.02601027, -0.21184283, -0.1962902, 0.09109358, -0.06792383, 0.092336476, 0.12215351, -0.08596062), target2);
	target2 = MulAdd(i2, MF4x4(-0.17530201, -0.0351919, -0.31872514, -0.13933206, -0.07000922, -0.049807087, 0.0010997375, -0.033573963, 0.07442056, -0.33290103, -0.40381998, 0.09435, -0.3280128, -0.09953127, -0.11283648, 0.20685865), target2);
	target2 = MulAdd(na1, MF4x4(-0.052573867, -0.035328753, -0.11132943, -0.17515652, 0.05021051, 0.058642425, -0.046640664, 0.0799107, -0.027398815, -0.33619994, -0.22135767, 0.07894002, -0.14941697, -0.0940996, -0.11655085, 0.049795926), target2);
	target2 = MulAdd(nb1, MF4x4(-0.039301276, 0.041062318, 0.20312686, -0.009338705, 0.013706282, -0.0245852, 0.03458311, 0.09601228, -0.18203016, -0.012260314, 0.17984508, -0.056576703, -0.102844186, 0.24047872, 0.05307189, 0.16066082), target2);
	target2 = MulAdd(nc1, MF4x4(0.1478775, 0.0046362123, 0.05459521, 0.07162838, -0.01896149, 0.23700175, -0.14174299, 0.06988599, -0.32545477, -0.08065096, -0.061227743, -0.0010796773, 0.094327345, -0.20760082, -0.19523263, 0.19859222), target2);
	target2 = MulAdd(nd1, MF4x4(-0.049676366, -0.10381536, 0.02546116, -0.13127093, 0.10954914, 0.0048147943, 0.06962328, -0.30456528, -0.11956627, 0.0150488885, -0.10711722, 0.1684613, -0.1939089, -0.10577047, -0.11980919, -0.036988296), target2);
	target2 = MulAdd(ne1, MF4x4(-0.054795764, 0.09491116, -0.08494948, 0.059765853, 0.0131597435, 0.20786162, 0.11999637, 0.024381055, 0.22830428, 0.027053319, -0.011646274, -0.12145409, -0.07899559, -0.012688263, 0.10684157, 0.3824219), target2);
	target2 = MulAdd(nf1, MF4x4(-0.23994572, -0.0031532666, -0.0050638164, 0.14236279, 0.05690383, -0.06259682, 0.052624144, 0.20461404, -0.19230312, -0.11072268, 0.013023965, 0.08931543, -0.21997221, 0.11760443, -0.40943825, 0.28656834), target2);
	target2 = MulAdd(ng1, MF4x4(-0.06606179, 0.26007771, 0.033754125, 0.119690455, 0.024669139, -0.06752839, 0.12688096, -0.0063201943, -0.17123021, 0.07548857, -0.14213699, 0.034093797, -0.15632647, -0.123243414, -0.42634043, 0.1715022), target2);
	target2 = MulAdd(nh1, MF4x4(-0.046503466, 0.13876389, 0.17973013, -0.25938338, -0.18824704, -0.11876702, 0.31065792, -0.041042212, -0.061369427, 0.2057992, 0.17295738, 0.3836555, -0.21109799, -0.10167118, 0.16577047, 0.113483034), target2);
	target2 = MulAdd(ni1, MF4x4(-0.24534856, -0.014482421, 0.22515748, -0.12773542, 0.12794174, -0.02528619, 0.41710484, 0.09154934, -0.17805946, -0.25428918, 0.07294183, 0.047079418, -0.30949152, -0.08919157, 0.17888431, 0.17706038), target2);
	target2 = MulAdd(na2, MF4x4(-0.1741826, 0.046225294, -0.10761791, 0.2619953, 0.007373745, 0.05104337, -0.22309966, 0.34529984, -0.034363825, -0.022187237, -0.08609555, 0.16842419, 0.28136057, 0.17843607, -0.11307746, -0.05668021), target2);
	target2 = MulAdd(nb2, MF4x4(-0.12310616, -0.29661375, -0.10581025, -0.049584012, 0.19651765, 0.08436489, -0.14533581, -0.029874112, -0.15422897, -0.062741704, -0.22694711, -0.15547274, -0.15181333, 0.0286061, 0.022438493, -0.062447168), target2);
	target2 = MulAdd(nc2, MF4x4(0.3497046, -0.09455009, 0.060618952, -0.2134236, 0.054515295, 0.07451165, -0.09267233, -0.010513333, 0.13842636, 0.11563433, -0.054750167, 0.050432, 0.1514256, 0.04284002, -0.2095581, 0.07907657), target2);
	target2 = MulAdd(nd2, MF4x4(-0.11745651, -0.04717057, 0.085377194, -0.065956995, 0.07280491, 0.2730059, 0.11088276, 0.2437957, 0.14018989, 0.1164107, -0.09516929, 0.0022427947, 0.111544006, -0.0680495, 0.09324579, -0.12482022), target2);
	target2 = MulAdd(ne2, MF4x4(-0.07995795, -0.03387884, 0.019846136, 0.10231208, -0.07017192, 0.18659039, 0.035161644, 0.101182766, -0.14901665, 0.21307294, 0.063894205, -0.27546507, -0.24792959, -0.067731075, 0.13146006, -0.19333683), target2);
	target2 = MulAdd(nf2, MF4x4(0.034206454, 0.1472648, -0.07406727, 0.014654025, 0.18703444, 0.1319857, -0.10610886, 0.08427947, -0.017536618, -0.06487879, -0.12095286, -0.050414838, 0.03260879, 0.1558894, -0.031887084, 0.11840288), target2);
	target2 = MulAdd(ng2, MF4x4(0.114811294, -0.14574333, -0.09392587, 0.042283528, 0.08919092, 0.18259068, 0.0980717, 0.21024778, -0.1280008, -0.027260462, -0.1129027, 0.18722472, 0.13733985, 0.047153983, 0.030871978, 0.1998385), target2);
	target2 = MulAdd(nh2, MF4x4(-0.06783575, 0.004612595, 0.1153467, -0.11531557, -0.048889533, 0.07673577, -0.02041786, 0.22744459, -0.13092506, 0.13484807, 0.40003043, -0.053706612, -0.16985156, -0.04791236, -0.052443005, -0.08363625), target2);
	target2 = MulAdd(ni2, MF4x4(0.18187882, 0.017893985, 0.17856054, 0.005413129, 0.014147176, 0.15102178, 0.12436294, -0.02176765, -0.16727823, -0.0364111, 0.17074408, 0.12899421, 0.31984514, -0.0072070034, 0.031895883, -0.1991405), target2);

	MF3 target3 = tex5.SampleLevel(sam, pos, 0).rgb;
	target3 = MulAdd(e1, MF4x3(0.003730466, -0.024648283, -0.022169832, -0.0062762927, 0.022062732, 0.032966793, 0.016349113, 0.017197203, 0.020952817, -0.1763789, 0.035497356, 0.053835396), target3);
	target3 = MulAdd(e2, MF4x3(0.020886675, -0.07054202, -0.079142675, 0.06664387, 0.044960167, 0.042230908, -0.095019594, 0.012421141, 0.0142890485, 0.056814816, -0.012751135, -0.014684506), target3);
	target3 = MulAdd(ne1, MF4x3(0.011765893, 0.0008920681, -0.0018258415, -0.010473814, -0.023085753, -0.028783914, -0.023034256, -0.0024786016, -0.0052162083, 0.1643386, -0.06132718, -0.09289065), target3);
	target3 = MulAdd(ne2, MF4x3(0.016597198, 0.09389637, 0.10833379, -0.043163072, -0.04714812, -0.035274632, 0.09634976, -0.009292612, -0.022424143, -0.08765172, 0.0051558353, 0.010900356), target3);

	tex3[gxy] = target1;
	tex4[gxy] = target2;
	tex6[gxy] = MF4(target3, 1);
}


//!PASS 7
//!DESC Conv-4x3x3x16
//!IN tex3, tex4, tex6
//!OUT tex1, tex2, tex5
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass7(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	MF4 a1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d1 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e1 = tex3.SampleLevel(sam, pos, 0);
	MF4 f1 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na1 = max(-a1, 0);
	MF4 nb1 = max(-b1, 0);
	MF4 nc1 = max(-c1, 0);
	MF4 nd1 = max(-d1, 0);
	MF4 ne1 = max(-e1, 0);
	MF4 nf1 = max(-f1, 0);
	MF4 ng1 = max(-g1, 0);
	MF4 nh1 = max(-h1, 0);
	MF4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);

	MF4 a2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d2 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e2 = tex4.SampleLevel(sam, pos, 0);
	MF4 f2 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na2 = max(-a2, 0);
	MF4 nb2 = max(-b2, 0);
	MF4 nc2 = max(-c2, 0);
	MF4 nd2 = max(-d2, 0);
	MF4 ne2 = max(-e2, 0);
	MF4 nf2 = max(-f2, 0);
	MF4 ng2 = max(-g2, 0);
	MF4 nh2 = max(-h2, 0);
	MF4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);

	MF4 target1 = { -0.02981662, -0.26338395, -0.011632586, 0.15063232 };
	target1 = MulAdd(a1, MF4x4(-0.082203194, 0.021720003, 0.03725474, -0.08048348, 0.2063248, -0.033020593, -0.17585336, 0.06476272, 0.012244563, 0.026554609, 0.014708393, 0.26606125, 0.14248778, 0.12817341, -0.039826933, -0.12751861), target1);
	target1 = MulAdd(b1, MF4x4(0.24573852, 0.19695967, -0.06257417, -0.04782871, 0.3511875, -0.018083302, -0.077342674, 0.15247667, 0.20321761, -0.07479984, -0.09548503, 0.08109568, -0.23808748, 0.07246303, -0.004242619, 0.16162953), target1);
	target1 = MulAdd(c1, MF4x4(0.13296306, 0.19495387, 0.009222276, 0.033592198, 0.20443891, 0.16063854, -0.2581601, -0.016132578, -0.2296461, -0.23647323, -0.15407176, -0.18265317, 0.2343241, -0.049697313, -0.09398783, 0.41931856), target1);
	target1 = MulAdd(d1, MF4x4(-0.10866088, -0.40605694, -0.0042648134, 0.07943803, 0.26914695, 0.14816476, 0.037706107, -0.123223364, -0.19962949, -0.053534556, -0.08397409, -0.04244924, -0.075791344, 0.29629225, 0.2311928, 0.099177904), target1);
	target1 = MulAdd(e1, MF4x4(-0.1748319, -0.2003186, -0.32659066, -0.21007413, 0.20122464, 0.032196607, -0.026299698, 0.33395135, 0.11411664, 0.05971959, 0.09001304, -0.15936212, 0.012322024, 0.19936106, -0.411186, -0.08319479), target1);
	target1 = MulAdd(f1, MF4x4(-0.07349218, 0.006184436, 0.096199185, -0.050186496, 0.064047046, -0.03813128, -0.057007037, -0.025550695, -0.2863145, -0.008512981, -0.20615962, 0.18009211, 0.008298396, 0.22452813, 0.010843521, 0.20169461), target1);
	target1 = MulAdd(g1, MF4x4(0.2691149, 0.059546687, 0.08922005, 0.2252196, 0.30341956, -0.024489028, 0.087045394, -0.03856442, -0.14083561, -0.17683443, 0.14137806, 0.15520614, 0.2073925, -0.19525874, 0.23661858, 0.3098405), target1);
	target1 = MulAdd(h1, MF4x4(0.006530723, 0.04180736, -0.04762067, -0.064395495, 0.02396811, -0.13332283, 0.0037775645, 0.026309434, 0.0033065109, -0.08315753, 0.02917419, 0.12330464, 0.22819455, -0.07489677, 0.12829056, -0.097994626), target1);
	target1 = MulAdd(i1, MF4x4(-0.09983759, 0.032783493, 0.11085758, 0.08993078, -0.057110567, -0.018973934, -0.14946178, -0.03921629, 0.039757587, 0.015860094, 0.04989561, -0.19634786, 0.04351146, 0.019315343, 0.25972188, 0.17989321), target1);
	target1 = MulAdd(a2, MF4x4(-0.04111906, -0.165601, 0.0003682197, -0.056232415, -0.32716644, -0.24015541, -0.057547837, 0.05966729, 0.06854747, 0.03599213, -0.18798864, 0.1183447, 0.014268468, -0.1310834, 0.06415977, -0.19414157), target1);
	target1 = MulAdd(b2, MF4x4(-0.00070661673, 0.17671427, 0.10584568, -0.060910843, -0.104282066, -0.22676118, -0.01907062, 0.24882245, -0.043454725, 0.07691623, -0.48371696, 0.013537671, -0.025488405, 0.061228953, 0.18548754, 0.028671112), target1);
	target1 = MulAdd(c2, MF4x4(-0.0121596735, 0.09595702, -0.08244918, -0.1176173, 0.26773354, -0.021729136, 0.075465776, -0.0928876, 0.12461298, 0.16830076, -0.15302569, 0.113850676, 0.09811088, 0.13006307, 0.24999009, 0.10261325), target1);
	target1 = MulAdd(d2, MF4x4(-0.032246377, 0.038265374, -0.26476422, -0.1442876, -0.19866082, 0.08649541, 0.041478764, 0.11155026, 0.21576422, -0.09572912, -0.11174068, -0.19722937, -0.15801935, 0.29604745, -0.08606268, -0.15532136), target1);
	target1 = MulAdd(e2, MF4x4(-0.06315591, 0.16151646, -0.009230362, -0.04341246, 0.09085519, 0.21924476, 0.38044852, 0.193819, 0.16622902, 0.0025134624, -0.22688466, -0.025276015, 0.07714917, 0.16302192, -0.11767101, -0.11086476), target1);
	target1 = MulAdd(f2, MF4x4(-0.04170153, 0.001859292, -0.26352355, 0.10982333, -0.031867817, 0.15773517, -0.060263418, 0.11117763, -0.017359972, 0.0127261225, 0.0782802, -0.16908924, 0.080516845, -0.05691526, -0.07530135, -0.14553802), target1);
	target1 = MulAdd(g2, MF4x4(0.06112685, -0.032287434, 0.17445667, -0.044935808, -0.11449107, -0.051394563, -0.029589338, -0.14555557, 0.03440661, 0.11035615, -0.17175, -0.14851089, 0.037362, -0.18740481, 0.17278154, 0.18073405), target1);
	target1 = MulAdd(h2, MF4x4(-0.27670652, 0.19484822, 0.2609349, 0.1455016, 0.04438468, 0.1449185, 0.11185832, -0.18598269, -0.019846648, 0.11886126, -0.098498635, 0.15737785, 0.011406795, -0.18860829, -0.13705735, 0.17535745), target1);
	target1 = MulAdd(i2, MF4x4(-0.30244905, -0.28695273, 0.1146976, 0.21144345, -0.037980128, -0.027679864, -0.13992494, -0.04884521, -0.032023884, -0.07921183, -0.16042095, -0.06935386, -0.06570237, -0.1107404, -0.018163798, 0.22625941), target1);
	target1 = MulAdd(na1, MF4x4(-0.07292955, -0.07321777, -0.045146503, -0.33291966, -0.096732594, -0.07203495, 0.33692798, 0.2870733, 0.122160144, -0.076574564, 0.042844944, 0.26448342, 0.07672146, -0.028775277, -0.12088313, 0.15583947), target1);
	target1 = MulAdd(nb1, MF4x4(0.21589327, 0.05258274, 0.09705794, -0.024653846, -0.039402515, 0.28485695, 0.14711736, -0.10556087, -0.15140481, 0.09039498, 0.017308712, 0.11862922, 0.08230978, 0.21678248, -0.043815188, -0.226433), target1);
	target1 = MulAdd(nc1, MF4x4(-0.029258793, 0.26618922, 0.02564014, -0.23189862, -0.24074338, -0.18556763, 0.25973624, 0.04746873, 0.0137007125, -0.22239363, -0.12414957, 0.048228756, -0.22406264, 0.282667, -0.021001073, -0.17465611), target1);
	target1 = MulAdd(nd1, MF4x4(0.32401654, -0.1495363, -0.20869227, 0.04271639, -0.0087802755, 0.031325378, 0.23834595, 0.039336167, 0.17265107, 0.20947595, 0.28737286, 0.0028783784, -0.057340365, -0.050347418, -0.11915604, -0.1831807), target1);
	target1 = MulAdd(ne1, MF4x4(0.1811338, 0.07732653, 0.20975596, -0.47129005, 0.07121942, 0.08410583, 0.44170937, -0.19524159, -0.17807977, 0.12837476, 0.20816846, -0.1741958, -0.04411918, 0.06024972, 0.18159702, -0.052485272), target1);
	target1 = MulAdd(nf1, MF4x4(-0.15229738, 0.27513, 0.28150418, -0.19543962, -0.02045864, -0.07207227, 0.09589587, 0.09110817, 0.061413247, 0.0046052113, 0.11619411, -0.2988938, 0.065739445, 0.10205611, 0.12847126, -0.028355654), target1);
	target1 = MulAdd(ng1, MF4x4(0.0657154, -0.047568597, -0.16148911, 0.16392621, -0.25281775, -0.061153214, 0.017480455, -0.026288848, 0.20319715, 0.04763355, 0.010444491, -0.26671803, -0.25821987, 0.32863674, -0.30734694, -0.18190521), target1);
	target1 = MulAdd(nh1, MF4x4(-0.042703815, 0.06633036, -0.048434302, -0.17176376, -0.12699759, -0.1124558, 0.083266065, 0.03354623, -0.13468939, 0.12706263, 0.053659134, -0.06930602, 0.008196115, 0.2034998, -0.06351442, -0.039730288), target1);
	target1 = MulAdd(ni1, MF4x4(0.09614661, 0.22500272, 0.088511504, -0.16960482, 0.15364788, -0.18854137, -0.13163191, -0.07503735, -0.23177068, -0.0053305267, -0.041978605, 0.0971947, -0.049034655, 0.04486706, 0.09076307, -0.02310868), target1);
	target1 = MulAdd(na2, MF4x4(-0.1304683, 0.17743458, -0.09817326, -0.0646786, 0.07886976, 0.20109388, -0.034114968, -0.2029261, -0.03348398, 0.029337432, -0.07302782, -0.02240758, 0.030242773, -0.30032325, 0.02085572, -0.027314361), target1);
	target1 = MulAdd(nb2, MF4x4(-0.037377544, 0.026350772, -0.07430488, -0.114671774, -0.126935, -0.046512567, -0.033628833, -0.19018382, -0.041053895, -0.031206857, 0.08562848, -0.01875709, 0.21099389, -0.092511, 0.0073047103, -0.009811013), target1);
	target1 = MulAdd(nc2, MF4x4(0.11358029, 0.17468451, -0.12739041, -0.14332245, -0.22230148, 0.16862972, -0.04462456, 0.2469604, -0.008622369, 0.0081848325, -0.17032363, -0.16024362, 0.21178265, 0.037127133, 0.08559072, 0.11584694), target1);
	target1 = MulAdd(nd2, MF4x4(0.008993893, -0.08037705, 0.4426555, 0.15593371, 0.15273719, -0.03249998, 0.055109, -0.1512612, -0.037183985, 0.20825677, -0.08516227, -0.06664223, -0.10011001, -0.3505215, -0.17941694, 0.052089088), target1);
	target1 = MulAdd(ne2, MF4x4(-0.109703645, -0.13505603, 0.1336451, 0.13118869, 0.010915504, 0.12748592, 0.21201555, -0.40841985, -0.11059143, 0.033772044, -0.039282143, 0.03095394, 0.10394723, -0.21343367, -0.10699851, -0.028351074), target1);
	target1 = MulAdd(nf2, MF4x4(0.019704714, 0.06243651, 0.09896519, -0.17492259, 0.012675787, -0.004239029, 0.21319824, 0.069183126, -0.0071114586, 0.123431124, -0.24479835, 0.00723795, -0.045293927, 0.014101029, 0.15746681, 0.042405806), target1);
	target1 = MulAdd(ng2, MF4x4(0.023828225, -0.0015190929, 0.1194638, 0.082163885, 0.10532113, 0.042044062, 0.02528007, 0.015175004, 0.026613194, 0.33525538, -0.1627064, -0.29887968, -0.197707, 0.038967777, -0.15811683, -0.106895216), target1);
	target1 = MulAdd(nh2, MF4x4(0.044362027, -0.04946742, -0.14815849, -0.17660522, -0.034201477, -0.012243106, -0.050183997, 0.06407372, 0.039822515, 0.15880872, -0.0672721, -0.4081093, 0.019489579, -0.060278706, -0.015096743, -0.07799167), target1);
	target1 = MulAdd(ni2, MF4x4(0.11861756, 0.27113584, -0.14107186, -0.10246008, -0.124051, -0.1627854, 0.10698585, 0.2846401, -0.061731786, 0.1724438, -0.12428688, -0.09986041, -0.034171514, -0.07100923, 0.041739646, -0.11308375), target1);

	MF4 target2 = { 0.18765923, -0.07697714, 0.028134674, -0.060966115 };
	target2 = MulAdd(a1, MF4x4(0.17082009, 0.031344634, -0.06131912, 0.00887183, -0.01528174, 0.12943709, 0.24537678, 0.008178781, -0.312396, -0.023583878, 0.07827866, -0.1231261, 0.15081584, -0.18161978, -0.25179705, -0.036934935), target2);
	target2 = MulAdd(b1, MF4x4(-0.05768411, 0.16785417, -0.1788644, -0.0067257965, 0.021445744, 0.10066516, -0.23864186, 0.1450302, 0.12892793, 0.19856106, -0.24444748, 0.16531628, -0.044425935, -0.02775357, 0.009059946, -0.12958384), target2);
	target2 = MulAdd(c1, MF4x4(-0.025798557, -0.17238182, -0.34056288, -0.20921059, -0.03576266, 0.1476854, -0.06264234, 0.14452787, -0.04130045, -0.07275762, 0.034578666, 0.2914669, 0.20879944, 0.21359251, -0.048695553, 0.2638088), target2);
	target2 = MulAdd(d1, MF4x4(-0.022791177, 0.4204545, 0.116855636, 0.20241925, -0.010444933, -0.14462502, 0.022550104, -0.24423064, -0.09417524, 0.045358784, -0.11405829, 0.035979558, -0.2283092, -0.06670842, -0.23852053, -0.22417003), target2);
	target2 = MulAdd(e1, MF4x4(-0.14526704, 0.040880535, 0.14076385, 0.07795045, -0.059177604, -0.13056375, -0.3373641, -0.19344307, -0.29891858, -0.32578763, -0.29061425, 0.1562214, -0.13578376, 0.36586633, 0.24936736, 0.054629393), target2);
	target2 = MulAdd(f1, MF4x4(-0.025790233, -0.13020341, -0.10084969, 0.15767297, -0.09738769, 0.04034404, 0.0038675873, 0.043515608, 0.16899958, -0.29117966, 0.03420067, 0.14432564, -0.10473084, 0.21014084, 0.07775908, -0.09303797), target2);
	target2 = MulAdd(g1, MF4x4(-0.07443987, -0.16225167, 0.036251917, 0.028432872, 0.03759333, 0.004027401, -0.033941846, 0.0019474924, 0.02357054, 0.30748722, 0.1652115, -0.17361522, 0.16905582, 0.08048018, -0.23639561, -0.029408466), target2);
	target2 = MulAdd(h1, MF4x4(0.0461233, -0.09346199, -0.07063276, -0.19447634, -0.049339604, -0.0032855074, -0.22661209, -0.0543389, 0.11924857, -0.21691081, -0.1645725, -0.0075736847, 0.018572787, -0.06552861, -0.01777661, -0.11651732), target2);
	target2 = MulAdd(i1, MF4x4(-0.06425901, 0.123392984, -0.16395192, -0.093448035, -0.029316641, 0.0986573, -0.23135012, 0.011170849, 0.00023920486, 0.15296175, 0.35453254, -0.05189021, 0.20708887, -0.103900835, 0.081992395, -0.21829562), target2);
	target2 = MulAdd(a2, MF4x4(-0.019074136, -0.1572586, 0.27919227, 0.09119617, 0.035954695, 0.2941489, 0.18262725, -0.055522963, -0.21364328, -0.1573611, 0.104966134, 0.08228523, 0.19945285, -0.0039229114, -0.1565048, 0.028975379), target2);
	target2 = MulAdd(b2, MF4x4(-0.18501253, 0.006473006, 0.06637501, 0.04295065, 0.06411007, 0.1166344, -0.10060226, 0.46296063, -0.08600344, -0.03560105, 0.012215349, 0.017885283, 0.061346993, 0.17336361, 0.01935021, 0.20198092), target2);
	target2 = MulAdd(c2, MF4x4(-0.04451627, -0.10372061, -0.13968691, 0.14479733, 0.1660607, 0.19334625, 0.0085214665, 0.28863636, -0.07600901, -0.014777084, 0.13209191, -0.09045013, 0.104893915, -0.04776884, -0.007936376, 0.104568765), target2);
	target2 = MulAdd(d2, MF4x4(0.023751335, -0.108048, -0.050531313, 0.15916029, 0.13246661, 0.04644228, -0.09586482, -0.17222965, -0.22898191, -0.033484615, 0.078883134, -0.052609313, -0.2721741, 0.045986425, 0.13972299, -0.28923607), target2);
	target2 = MulAdd(e2, MF4x4(-0.23364568, -0.008875902, -0.40894926, 0.060443908, -0.2839635, -0.5270991, -0.2500865, 0.002020195, -0.24488612, -0.04982319, -0.009110353, -0.018023955, 0.06647274, -0.25225738, 0.26154432, -0.033934146), target2);
	target2 = MulAdd(f2, MF4x4(-0.1535129, -0.21257545, -0.16553773, 0.17471452, -0.06203719, 0.15238857, 0.18702018, 0.18572305, 0.07740396, -0.074217625, -0.072156586, -0.2183728, 0.00403749, 0.13750519, 0.30362993, 0.06550286), target2);
	target2 = MulAdd(g2, MF4x4(0.37164542, -0.1980723, -0.15659203, 0.19498909, 0.01748114, 0.011807152, -0.05424202, 0.11926474, 0.050406165, -0.12925303, -0.020280985, 0.08429331, 0.14769496, -0.077555746, -0.15216178, -0.27070466), target2);
	target2 = MulAdd(h2, MF4x4(0.35804263, 0.08539285, -0.14785156, -0.13532467, 0.058254432, 0.20448379, -0.006173341, 0.058168225, -0.21714899, -0.13472849, -0.09392532, -0.12753737, -0.097461835, -0.11419082, 0.09384189, 0.06414768), target2);
	target2 = MulAdd(i2, MF4x4(0.023494452, -0.22187226, -0.16694295, 0.0204334, -0.26720086, 0.15916729, 0.3098874, -0.10292057, 0.008854983, 0.13375004, -0.04409455, 0.09286524, 0.095829524, 0.12427317, -0.048659876, 0.18300754), target2);
	target2 = MulAdd(na1, MF4x4(-0.119153984, 0.10163183, 0.025017537, -0.40096784, 0.026778705, 0.15821172, -0.19947284, -0.33337715, 0.2952563, 0.16820388, -0.057061996, -0.029319009, -0.12184868, 0.09031512, 0.12028806, 0.021044692), target2);
	target2 = MulAdd(nb1, MF4x4(0.086744264, -0.046958666, 0.2130253, -0.46672252, 0.07135636, 0.0100029735, -0.13828261, -0.012365689, -0.11374441, 0.21084632, -0.059631422, -0.013799735, -0.037889663, -0.10701892, -0.09493782, 0.15516634), target2);
	target2 = MulAdd(nc1, MF4x4(0.031181194, -0.01535001, 0.029270316, 0.13128386, 0.11838377, -0.17051528, 0.12228499, -0.04841128, 0.33350074, -0.006144013, -0.09055018, 0.27470216, -0.26665646, -0.08703671, -0.01719071, -0.23449609), target2);
	target2 = MulAdd(nd1, MF4x4(-0.12856458, 0.005562174, -0.19517267, 0.13270985, 0.2776414, 0.032003902, -0.15778573, 0.15344355, 0.26930434, -0.13459459, 0.035019353, 0.08896612, 0.12847935, -0.122637205, 0.001815178, 0.08290523), target2);
	target2 = MulAdd(ne1, MF4x4(0.33805037, -0.15318587, -0.20955376, -0.26121393, -0.026022578, -0.1617741, 0.1336867, 0.026223289, 0.012059392, -0.17295446, -0.060811974, 0.14027825, -0.21134059, -0.08408573, -0.23773228, 0.110836074), target2);
	target2 = MulAdd(nf1, MF4x4(0.16176093, 0.15307428, -0.07711325, -0.3458805, 0.061291527, 0.023916256, 0.21370678, 0.0015756418, 0.10642374, 0.24807373, 0.11164451, 0.10780487, 0.087194376, -0.2718231, -0.008457387, 0.054078236), target2);
	target2 = MulAdd(ng1, MF4x4(-0.03259038, -0.20923306, 0.165477, 0.098864526, -0.02734457, 0.08871225, -0.01552188, 0.047712058, 0.055032052, -0.13044262, -0.2899521, 0.22230095, -0.029343741, -0.16427459, -0.005436118, -0.05111821), target2);
	target2 = MulAdd(nh1, MF4x4(0.20065974, -0.1556366, -0.12620135, 0.44572976, -0.020925352, 0.12025185, 0.20588058, 0.06391864, 0.046870507, 0.16942503, -0.049370963, 0.008779016, 0.04954915, 0.090298936, -0.16466027, 0.011152038), target2);
	target2 = MulAdd(ni1, MF4x4(0.13587528, 0.047841422, 0.19804007, -0.1672396, -0.072491, 0.04543739, 0.25287256, 0.015226213, 0.02007356, -0.049578942, -0.08796175, 0.1714897, -0.07819061, 0.1509537, 0.093094915, 0.031139288), target2);
	target2 = MulAdd(na2, MF4x4(-0.013774682, 0.118201815, -0.009592314, -0.10837201, -0.0686881, -0.083380274, 0.107689425, 0.046642892, 0.119898744, -0.05502989, -0.19719897, 0.0005697584, -0.0921928, 0.032281205, 0.2568853, 0.2325449), target2);
	target2 = MulAdd(nb2, MF4x4(0.02991112, -0.09898633, 0.06076172, -0.20906185, 0.0026118348, 0.06130956, 0.06760944, -0.16662054, 0.065741204, -0.13144116, 0.011419801, 0.22552124, 0.1465757, -0.07417319, -0.10788749, -0.24952699), target2);
	target2 = MulAdd(nc2, MF4x4(-0.19238451, -0.024058497, 0.19580396, -0.067399554, -0.18832864, -0.11752747, -0.078949094, -0.23762032, -0.04141864, 0.022530237, -0.02222157, 0.0054874527, 0.057746816, -0.34854797, 0.028730657, -0.08976777), target2);
	target2 = MulAdd(nd2, MF4x4(0.16888975, 0.19949849, -0.08456147, -0.03619044, -0.019596824, 0.11214634, 0.13971676, 0.22926724, 0.03219445, -0.04566354, -0.14948955, -0.22817011, -0.08714846, -0.19684613, 0.15479128, 0.2433362), target2);
	target2 = MulAdd(ne2, MF4x4(0.16050309, -0.102841675, 0.20855242, -0.011171905, -0.10309409, 0.22455123, 0.15892951, -0.06582373, 0.010079549, -0.2055006, -0.09385158, 0.006519388, 0.11838815, 0.37134558, -0.165772, 0.12704434), target2);
	target2 = MulAdd(nf2, MF4x4(0.11643292, 0.03294274, -0.09800525, -0.13601723, -0.081318736, -0.059975546, -0.039105035, -0.2893635, -0.13024913, -0.058016162, -0.09961072, 0.10532414, 0.24250132, -0.35546342, -0.092634924, 0.093994915), target2);
	target2 = MulAdd(ng2, MF4x4(-0.18799333, 0.25611782, 0.014645917, -0.063751906, 0.06498416, 0.16619027, -0.14411639, 0.3914421, -0.07343631, -0.116468735, -0.10941946, -0.2553544, -0.37774643, -0.0018441634, 0.06827239, -0.0122299045), target2);
	target2 = MulAdd(nh2, MF4x4(-0.11884597, -0.2477297, 0.048488285, -0.06438257, -0.124703035, 0.25932777, 0.0650111, -0.0930877, 0.06463341, -0.000544085, 0.0147504965, -0.170097, -0.13241997, 0.20983136, -0.15956205, 0.03424298), target2);
	target2 = MulAdd(ni2, MF4x4(-0.034574904, 0.06755256, 0.09508443, -0.17162292, 0.046379335, 0.2178781, 0.08699012, -0.055380464, -0.2237568, -0.07427848, -0.028395249, -0.3225617, -0.084454566, -0.24776657, 0.254169, 0.13229847), target2);

	MF3 target3 = tex6.SampleLevel(sam, pos, 0).rgb;
	target3 = MulAdd(e1, MF4x3(0.030815786, 0.021069322, 0.01812191, 0.084839165, -0.0080813095, -0.029270556, -0.10456346, 0.062386703, 0.0665605, 0.11926609, -0.1104228, -0.13291118), target3);
	target3 = MulAdd(e2, MF4x3(-0.07159541, -0.007267032, -0.010134558, 0.008234213, 0.045609634, 0.040295456, 0.018416971, 0.01308482, 0.014649557, 0.035107512, -0.02140815, -0.030279048), target3);
	target3 = MulAdd(ne1, MF4x3(0.01918586, 0.03875863, 0.03229402, -0.07917104, 0.041135103, 0.057182517, 0.08609541, 0.0079662455, 0.004327576, -0.14332893, 0.03120354, 0.056732506), target3);
	target3 = MulAdd(ne2, MF4x3(0.03200192, -0.0035752193, -0.0031064528, -0.010902813, 0.014607456, 0.019431474, -0.016461229, -0.004938204, -0.004655488, -0.033470232, 0.0026075812, 0.005896968), target3);

	tex1[gxy] = target1;
	tex2[gxy] = target2;
	tex5[gxy] = MF4(target3, 1);
}


//!PASS 8
//!DESC Conv-4x3x3x16, Conv-3x1x1x112
//!IN INPUT, tex1, tex2, tex5
//!OUT OUTPUT
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass8(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	
	const uint2 outputSize = GetOutputSize();
	if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	MF4 a1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e1 = tex1.SampleLevel(sam, pos, 0);
	MF4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na1 = max(-a1, 0);
	MF4 nb1 = max(-b1, 0);
	MF4 nc1 = max(-c1, 0);
	MF4 nd1 = max(-d1, 0);
	MF4 ne1 = max(-e1, 0);
	MF4 nf1 = max(-f1, 0);
	MF4 ng1 = max(-g1, 0);
	MF4 nh1 = max(-h1, 0);
	MF4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);

	MF4 a2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, -inputPt.y), 0);
	MF4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	MF4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	MF4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	MF4 e2 = tex2.SampleLevel(sam, pos, 0);
	MF4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	MF4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	MF4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	MF4 i2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0);

	MF4 na2 = max(-a2, 0);
	MF4 nb2 = max(-b2, 0);
	MF4 nc2 = max(-c2, 0);
	MF4 nd2 = max(-d2, 0);
	MF4 ne2 = max(-e2, 0);
	MF4 nf2 = max(-f2, 0);
	MF4 ng2 = max(-g2, 0);
	MF4 nh2 = max(-h2, 0);
	MF4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);

	MF4 target1 = { -0.19233678, 0.016725872, -0.008011114, -0.1977463 };
	target1 = MulAdd(a1, MF4x4(0.21919365, 0.36627784, 0.12603314, 0.24306288, 0.06447028, 0.06472204, -0.05997039, -0.15651788, 0.017059859, -0.006497198, -0.4189735, 0.021636713, -0.23887977, -0.014220949, 0.031113686, -0.17342716), target1);
	target1 = MulAdd(b1, MF4x4(-0.10818789, -0.03273837, 0.33918005, -0.19290088, 0.0955361, -0.34107623, -0.054906327, -0.18083344, -0.060723677, 0.24395694, 0.112975016, -0.07254578, -0.14389384, 0.13235968, -0.15054801, -0.26216486), target1);
	target1 = MulAdd(c1, MF4x4(-0.23442148, -0.07857079, 0.022283873, -0.2656417, 0.037092753, -0.037313666, -0.5057047, 0.042533103, -0.120424, 0.00021930189, -0.0044566668, -0.45536995, 0.00040759926, 0.14597592, -0.094990164, -0.036161344), target1);
	target1 = MulAdd(d1, MF4x4(0.15024352, 0.19903262, -0.0734784, 0.092836305, -0.025753846, 0.024750374, -0.07550193, 0.035420835, 0.11084378, 0.26119822, -0.08443512, -0.0047807065, -0.042685136, 0.24889739, 0.098650105, 0.2088369), target1);
	target1 = MulAdd(e1, MF4x4(-0.25551823, 0.14455976, 0.19886157, -0.23465924, 0.20711218, -0.20875362, -0.11320392, -0.30852005, -0.06795657, 0.008670962, 0.30601278, 0.6929064, 0.17079145, 0.15744895, 0.06441601, 0.06514001), target1);
	target1 = MulAdd(f1, MF4x4(0.03142604, -0.006410137, -0.023654792, -0.05708553, 0.062985405, -0.077010594, 0.078804865, 0.050882503, 0.010274228, -0.15558401, 0.09490256, 0.14964707, -0.11966925, -0.36176664, 0.27809814, -0.18862294), target1);
	target1 = MulAdd(g1, MF4x4(0.05609992, 0.0041612233, -0.08498908, 0.04479823, -0.080117956, -0.17423204, -0.22858045, 0.054569032, -0.050866384, -0.020000307, 0.027000953, -0.67724514, 0.16240878, -0.04641204, 0.0648367, -0.20613132), target1);
	target1 = MulAdd(h1, MF4x4(0.08542306, -0.08254248, -0.11090553, -0.14140448, -0.10788511, -0.13011602, -0.29319742, -0.26007155, 0.11033401, -0.31966573, 0.32668245, 0.19542319, 0.06329418, 0.20904626, 0.2724067, -0.009155685), target1);
	target1 = MulAdd(i1, MF4x4(-0.007403411, 0.0012836396, -0.23446666, -0.03017208, 0.062420018, -0.13611084, -0.2975928, 0.13173148, -0.03679939, 0.13743873, -0.10121899, 0.074514665, 0.1497629, -0.09523838, 0.39018926, 0.37807035), target1);
	target1 = MulAdd(a2, MF4x4(0.11441487, -0.19565523, -0.25757137, -0.16148767, 0.15575317, -0.12657928, 0.10479676, 0.062919036, 0.010544159, 0.22931573, 0.20360178, 0.4637635, -0.3395036, -0.52467215, 0.08759308, 0.028030418), target1);
	target1 = MulAdd(b2, MF4x4(0.2699195, -0.34218305, 0.15259695, 0.03139074, -0.024053533, -0.029567484, 0.28480124, 0.20525953, 0.15452823, -0.217713, 0.15861876, -0.012275699, 0.21408023, 0.097508304, -0.57126766, -0.14679857), target1);
	target1 = MulAdd(c2, MF4x4(-0.0755847, -0.09751562, -0.29480466, -0.22285318, 0.14196442, 0.114573136, -0.22294767, 0.12463806, 0.3322209, -0.04631724, -0.11097061, -0.27986854, -0.16099304, -0.060079545, 0.00299308, 0.120776065), target1);
	target1 = MulAdd(d2, MF4x4(0.050933484, -0.13776319, -0.18809728, 0.24035202, -0.32528606, -0.41684148, -0.029342847, 0.28642926, -0.07963454, -0.12905268, 0.07606093, 0.24670005, -0.08815598, -0.23320907, -0.008099349, 0.21512873), target1);
	target1 = MulAdd(e2, MF4x4(0.19247563, 0.18083979, -0.09719762, 0.15314941, -0.22350982, 0.46515045, -0.3571128, 0.35953265, 0.06921985, -0.4482386, -0.18732521, -0.5043983, 0.35159567, -0.33315298, -0.21884166, -0.16283798), target1);
	target1 = MulAdd(f2, MF4x4(-0.021124054, -0.007966742, 0.0052493825, 0.022550896, 0.030403977, 0.3377868, -0.47602004, -0.077664234, -0.07222509, -0.07486097, -0.37971064, -0.5107857, -0.06299477, 0.04930232, -0.3330487, 0.29845512), target1);
	target1 = MulAdd(g2, MF4x4(-0.063705474, -0.07917637, -0.02026607, -0.05142568, 0.021577014, -0.07379867, 0.033937998, 0.08148773, -0.02717838, -0.03233838, 0.098000035, 0.036476444, -0.13366953, 0.014477577, 0.24064232, 0.39313284), target1);
	target1 = MulAdd(h2, MF4x4(-0.16046515, -0.094624564, 0.35435164, 0.09942324, -0.07137174, -0.27999225, 0.124644354, -0.0062176553, 0.015016751, -0.05500243, -0.23249559, -0.4508382, 0.1860433, 0.10671491, -0.033345353, -0.06611453), target1);
	target1 = MulAdd(i2, MF4x4(0.21614046, -0.01307525, -0.18941112, -0.20533535, -0.14481686, -0.47801897, 0.22605121, -0.20298961, -0.06744227, -0.20377496, -0.11926173, 0.15645133, -0.31570885, -0.3495616, -0.024666889, 0.040965475), target1);
	target1 = MulAdd(na1, MF4x4(-0.11748018, -0.039976366, -0.00084064255, -0.028653437, -0.16216733, -0.036768105, 0.018064514, -0.0928936, 0.14008482, -0.064511225, 0.24329947, -0.0268608, 0.050330248, 0.08540601, -0.07272679, -0.01187671), target1);
	target1 = MulAdd(nb1, MF4x4(-0.09459936, -0.011723822, -0.06952858, -0.07808506, -0.065588176, 0.332501, -0.0120042395, 0.07668016, 0.14735217, -0.14856043, -0.06702449, -0.020953184, -0.023006834, 0.06135422, 0.1491448, -0.028061569), target1);
	target1 = MulAdd(nc1, MF4x4(0.25136968, 0.25146323, -0.108277924, -0.20407207, -0.0013780294, 0.16108194, 0.25143847, 0.06672421, -0.033905584, -0.021144686, -0.019152718, 0.34619498, 0.14560962, 0.034437314, 0.024790365, -0.049976267), target1);
	target1 = MulAdd(nd1, MF4x4(-0.24928351, 0.12637813, 0.23609994, 0.12722939, -0.036997862, -0.16554876, 0.11144095, -0.10040036, -0.020359103, -0.080701865, -0.3142192, 0.27257237, 0.13546956, -0.14416885, 0.028196262, -0.2886465), target1);
	target1 = MulAdd(ne1, MF4x4(0.28524777, -0.4236231, 0.27420738, -0.21095508, 0.23475651, 0.115876295, -0.18837357, -0.0260708, 0.030670704, -0.11516913, -0.11365572, -0.2203149, -0.018612983, -0.10719593, -0.031727783, 0.1403327), target1);
	target1 = MulAdd(nf1, MF4x4(0.07240512, 0.03139215, 0.12328737, -0.021201206, -0.13971715, 0.072742075, -0.0011289873, 0.0053133667, 0.035639685, -0.04322272, -0.19288473, -0.15812221, -0.19126481, 0.0698514, 0.17619178, -0.035605464), target1);
	target1 = MulAdd(ng1, MF4x4(-0.18552057, 0.07259671, 0.011667668, -0.15630563, 0.11414356, 0.14482655, -0.04021029, 0.18495587, -0.11386139, -0.09058561, -0.011265998, 0.23358451, 0.0521358, 0.12495261, 0.021644838, -0.048094347), target1);
	target1 = MulAdd(nh1, MF4x4(-0.09222373, 0.0533347, 0.055820454, 0.22382596, 0.18713981, 0.2668916, -0.019384036, 0.012698582, 0.13325234, 0.20361474, -0.33106443, -0.08571572, -0.21243028, -0.10996386, 0.123459645, 0.1534967), target1);
	target1 = MulAdd(ni1, MF4x4(0.18133277, 0.18108074, -0.05638664, 0.29533157, -0.2108019, -0.033636626, 0.5015888, -0.15116066, -0.041320793, -0.14764231, 0.07314567, -0.18865979, 0.10276937, 0.094240844, -0.1364283, 0.27812913), target1);
	target1 = MulAdd(na2, MF4x4(0.06040915, 0.23753685, 0.19019844, 0.23948252, -0.07535012, 0.11848904, 0.14389765, 0.050067905, 0.16150077, -0.030053454, 0.12478255, 0.26020208, 0.111198805, 0.06787492, -0.12771018, 0.006687384), target1);
	target1 = MulAdd(nb2, MF4x4(-0.5421617, 0.10414128, -0.21526064, -0.08883624, 0.13145073, -0.29695904, 0.57386386, 0.073361695, -0.09538372, 0.27593842, 0.070922814, 0.21769938, 0.06214975, 0.11847816, 0.10033405, 0.29360098), target1);
	target1 = MulAdd(nc2, MF4x4(-0.16294672, -0.014815565, 0.22046989, 0.16858687, 0.058917344, 0.21384977, 0.18803519, 0.105688855, 0.0355118, 0.20571202, -0.07341922, 0.26624045, -0.0415102, 0.050942056, 0.19727907, 0.20122413), target1);
	target1 = MulAdd(nd2, MF4x4(-0.020470422, 0.15815964, -0.13437317, -0.1967045, 0.074902646, 0.08356444, 0.055913117, -0.12837863, -0.18647918, 0.07002247, 0.038864706, -0.07288784, 0.04135125, -0.016055549, -0.1340297, -0.15578008), target1);
	target1 = MulAdd(ne2, MF4x4(-0.07685624, 0.00079105416, -0.068755336, 0.110282525, -0.014170752, 0.041282844, -0.17035173, 0.19439398, -0.3036256, 0.024148455, -0.19566648, -0.06736254, 0.14203559, -0.13016985, -0.32845357, -0.14266774), target1);
	target1 = MulAdd(nf2, MF4x4(0.0087252045, 0.098839566, -0.08770506, -0.08499465, 0.015245115, -0.110854514, 0.054458305, -0.018121868, -0.09666134, -0.08316006, 0.24617113, -0.17195955, 0.2574254, 0.06734342, -0.13792352, -0.07306126), target1);
	target1 = MulAdd(ng2, MF4x4(-0.0073954533, -0.20126835, -0.22545357, -0.29462856, 0.057408337, 0.11939119, -0.01846476, 0.12534486, 0.15751605, -0.14282645, -0.14219986, 0.14283386, 0.14090413, 0.10500912, 0.03039335, 0.17448832), target1);
	target1 = MulAdd(nh2, MF4x4(0.043910783, -0.09140025, -0.21666165, 0.07616939, 0.104454786, 0.309926, -0.12906921, 0.1140117, 0.09372434, 0.049547072, -0.086615674, -0.034449168, 0.096705064, 0.26001686, 0.027063297, 0.12422948), target1);
	target1 = MulAdd(ni2, MF4x4(0.1365422, 0.2679611, 0.12037257, 0.43346113, 0.08223084, -0.016788265, 0.13570398, -0.017974345, -0.17922844, -0.09475725, 0.073539585, -0.106947675, 0.08998511, 0.04133868, 0.16586913, -0.26291734), target1);

	MF4 target2 = { 0.26886886, 0.05874665, 0.10268232, 0.05833081 };
	target2 = MulAdd(a1, MF4x4(-0.36016628, 0.019064043, 0.3073228, 0.16891135, 0.026739368, 0.31136194, 0.11260383, -0.26918694, 0.0419928, -0.3365078, 0.20189743, -0.04136312, 0.039564647, 0.033199426, 0.18768296, -0.017119858), target2);
	target2 = MulAdd(b1, MF4x4(0.28663483, -0.41716507, 0.059281543, 0.043736435, 0.0028875466, 0.13817391, -0.12543318, -0.2794053, -0.023528943, 0.10610115, 0.09100278, 0.040132936, -0.21949205, -0.027810011, -0.0301218, 0.084047124), target2);
	target2 = MulAdd(c1, MF4x4(0.39674807, -0.0040878756, -0.038235947, 0.11880838, 0.009898328, 0.19107847, -0.009313831, -0.1554276, -0.047341663, 0.18049581, -0.029317195, 0.0708909, 0.0708316, -0.110617444, 0.14584038, -0.022261223), target2);
	target2 = MulAdd(d1, MF4x4(-0.20400241, 0.0896492, -0.010386381, -0.052133385, 0.005023956, -0.06628705, -0.16436209, -0.25345984, -0.05285192, 0.09706557, -0.03778914, -0.152546, 0.17023252, 0.063713826, 0.00743037, 0.056634087), target2);
	target2 = MulAdd(e1, MF4x4(-0.080793336, 0.4204207, 0.19098237, 0.20028038, -0.054076545, 0.22064368, -0.25853387, -0.3643562, 0.2085573, -0.023731, -0.06727709, -0.18683033, -0.18032159, -0.06388348, 0.304463, -0.2517781), target2);
	target2 = MulAdd(f1, MF4x4(0.11940941, 0.10624008, 0.16120581, 0.2369602, 0.3321827, 0.4272075, -0.10403669, -0.31388018, -0.006372124, -0.00653671, 0.109810196, 0.2277172, 0.005771998, 0.086026914, -0.08934813, -0.094941735), target2);
	target2 = MulAdd(g1, MF4x4(-0.13233568, 0.24112508, -0.0068006413, 0.12466225, 0.11396591, -0.07249253, -0.29090378, -0.12828146, -0.22001141, -0.08532405, -0.11932601, 0.29452974, 0.09572195, 0.017603843, 0.12454017, 0.16321751), target2);
	target2 = MulAdd(h1, MF4x4(0.042107448, -0.00807216, 0.06580674, -0.1289527, 0.13977426, -0.037159685, -0.21001346, -0.08698161, 0.22370502, -0.29170328, 0.2179206, 0.36621302, 0.0825477, -0.016513655, -0.11157249, 0.12861598), target2);
	target2 = MulAdd(i1, MF4x4(0.2246826, -0.13262233, 0.12131653, -0.15522355, 0.38104856, 0.030237729, 0.1286289, -0.19770473, -0.16175011, -0.13688888, 0.23505463, 0.21333031, 0.76352316, -0.17949077, -0.13124311, 0.1613879), target2);
	target2 = MulAdd(a2, MF4x4(-0.050607495, 0.0846705, -0.06136092, -0.033436477, 0.41138348, 0.037043408, -0.02676336, -0.37771952, 0.22147503, 0.06490757, -0.04266158, -0.22606373, 0.045775007, -0.054498192, -0.21495876, -0.036050417), target2);
	target2 = MulAdd(b2, MF4x4(-0.06242522, 0.2700824, -0.05602621, -0.12361551, 0.14477442, 0.19403581, 0.23505251, -0.072234035, -0.15831544, 0.4640447, -0.104754634, -0.004539681, -0.20246096, 0.23216484, -0.35886365, 0.11360777), target2);
	target2 = MulAdd(c2, MF4x4(0.14777757, 0.18951412, 0.027219458, 0.11216015, 0.02997997, -0.13466355, -0.0010830094, 0.021302953, 0.23441231, -0.14529245, 0.08068729, 0.10044398, 0.3972878, 0.26570204, 0.0046810666, -0.2863261), target2);
	target2 = MulAdd(d2, MF4x4(-0.10385485, 0.1053724, 0.16961229, 0.20727012, -0.025148917, -0.011365095, 0.03899919, -0.030950211, 0.079080455, -0.32767853, 0.064670205, -0.035771385, 0.16833797, -0.21567492, 0.30871257, -0.19965471), target2);
	target2 = MulAdd(e2, MF4x4(-0.23420888, -0.004894698, -0.18162623, -0.31107524, 0.11976508, 0.14924951, -0.08723316, 0.21401922, -0.58200324, -0.01177345, -0.049033508, 0.19593577, -0.21139073, 0.13016601, 0.08734843, 0.4158892), target2);
	target2 = MulAdd(f2, MF4x4(0.0009789813, 0.33274913, 0.017405733, -0.042906318, -0.26410276, -0.09291333, 0.019387102, 0.105381854, -0.009176527, 0.09483514, -0.28462934, -0.03644404, 0.285194, -0.4260311, 0.14902237, -0.115670316), target2);
	target2 = MulAdd(g2, MF4x4(-0.09344311, 0.4463103, 0.19984834, -0.09733857, -0.118717775, -0.0708026, 0.24919955, -0.11234634, 0.1246395, -0.052909933, 0.1525815, 0.07724016, 0.0070534665, -0.06404165, -0.18149726, -0.014058336), target2);
	target2 = MulAdd(h2, MF4x4(-0.17353044, 0.15376104, 0.004588994, -0.13554202, -0.19920237, -0.18918681, 0.11327512, -0.117296435, -0.0785251, 0.013677155, -0.2103214, 0.06843426, -0.27790928, 0.09837545, -0.00019213746, 0.09132539), target2);
	target2 = MulAdd(i2, MF4x4(-0.01586651, 0.014929441, 0.2426186, -0.1889374, -0.0865462, -0.07454513, -0.20797268, -0.22366855, 0.19704159, 0.0048206006, -0.16707218, -0.14162683, 0.036798395, -0.1663155, -0.12009389, 0.09603803), target2);
	target2 = MulAdd(na1, MF4x4(-0.041532192, 0.05753804, 0.17927068, -0.042112097, 0.12080969, -0.15052572, -0.34855765, -0.07356988, -0.28199884, -0.18958664, 0.15879883, 0.08511588, 0.0034213227, -0.05338495, -0.37285298, 0.06626709), target2);
	target2 = MulAdd(nb1, MF4x4(-0.20219134, 0.22150375, -0.29405454, 0.06597703, -0.018885285, -0.010551704, -0.010774283, 0.08758955, -0.2015349, -0.17006227, -0.24321876, -0.06864207, -0.118437864, -0.043977212, -0.029736811, 0.14040919), target2);
	target2 = MulAdd(nc1, MF4x4(-0.18709077, -0.09723938, 0.12783436, -0.15167634, 0.29039705, -0.11009911, 0.018371418, -0.060096707, -0.07256923, -0.25799567, -0.06276934, -0.035992302, -0.06729111, -0.059956793, -0.024079734, 0.011838878), target2);
	target2 = MulAdd(nd1, MF4x4(0.010449175, -0.08212451, 0.1409803, 0.11861122, -0.18035835, 0.051930565, 0.01049551, -0.09447962, 0.12029649, 0.040604513, -0.059971705, -0.0044667358, -0.22080486, -0.11187681, 0.124374695, -0.004155485), target2);
	target2 = MulAdd(ne1, MF4x4(-0.28584236, -0.38480133, -0.13987814, -0.4463469, -0.3890419, -0.022498172, 0.17334452, 0.21895568, -0.15450422, -0.10905497, 0.15111905, -0.22554915, 0.106121585, -0.029144369, 0.36059046, 0.22140682), target2);
	target2 = MulAdd(nf1, MF4x4(-0.23780307, -0.023033705, 0.068205886, -0.110635854, -0.26720005, -0.1608183, 0.19523881, 0.07972837, -0.018495852, -0.2793956, 0.17668398, -0.12020479, -0.079556085, -0.02284952, 0.031480275, 0.31818348), target2);
	target2 = MulAdd(ng1, MF4x4(0.22501226, -0.00829407, 0.059581667, 0.16512989, 0.18711442, 0.1200968, 0.11812652, -0.16091056, 0.15733972, 0.045156084, 0.20640492, -0.16852027, -0.11217177, 0.06746273, -0.050218176, 0.08643783), target2);
	target2 = MulAdd(nh1, MF4x4(0.20715691, -0.1082907, 0.027892975, 0.19515261, -0.17838904, 0.1532257, -0.108409844, -0.06632365, -0.13805026, 0.23020233, 0.12416581, -0.14861803, 0.16650471, 0.08158386, -0.09051303, -0.06981649), target2);
	target2 = MulAdd(ni1, MF4x4(-0.04617126, 0.06579221, 0.25964734, 0.28500968, 0.07641255, -0.090885855, -0.0972522, 0.18298368, -0.06393334, 0.103463, -0.23062052, -0.15270731, 0.13633437, 0.074707486, 0.15065335, -0.024602572), target2);
	target2 = MulAdd(na2, MF4x4(0.118319295, 0.010410938, 0.044655934, -0.104725905, 0.030477569, 0.12867387, 0.039075315, 0.18922117, 0.13301082, -0.1601557, 0.038168408, -0.07372259, -0.09522213, -0.095107146, -0.16679631, 0.044673234), target2);
	target2 = MulAdd(nb2, MF4x4(0.46229, -0.30780822, -0.09081465, 0.1433387, -0.0315039, 0.059409115, -0.24948491, -0.17146957, 0.060843736, -0.041989822, 0.054005735, 0.22835566, 0.12036598, -0.0070898845, 0.17276852, -0.17754094), target2);
	target2 = MulAdd(nc2, MF4x4(-0.35119572, 0.020034311, 0.08751943, 0.08193488, 0.041884877, 0.22649358, -0.07447533, 0.20845473, -0.04859846, -0.16206735, 0.06819576, -0.053000778, 0.18146423, 0.04694148, 0.045293212, 0.06783575), target2);
	target2 = MulAdd(nd2, MF4x4(0.280914, -0.14998704, -0.23485807, -0.015608296, 0.1549556, -0.11992663, -0.094974115, 0.05887284, 0.053392075, 0.10322464, -0.075066686, 0.068358354, -0.18663338, 0.009901499, -0.123370335, -0.12502703), target2);
	target2 = MulAdd(ne2, MF4x4(0.7748568, -0.17870626, -0.20770052, 0.024692526, -0.056430295, -0.06324113, -0.03660047, 0.29629672, -0.51896983, -0.027231261, 0.05903762, 0.077677645, -0.061675485, -0.20277846, 0.10352223, -0.08198446), target2);
	target2 = MulAdd(nf2, MF4x4(-0.06347568, 0.21643166, -0.09718546, 0.0372257, -0.029537952, -0.0357135, -0.09548363, 0.18225233, -0.29609334, -0.3496132, 0.18245913, -0.10162589, -0.18189451, -0.09077887, 0.117313184, -0.06863874), target2);
	target2 = MulAdd(ng2, MF4x4(-0.047373574, -0.020289376, -0.25748715, -0.13568166, 0.15656634, -0.06841899, 0.012100781, -0.13611819, 0.0016357322, -0.23870537, 0.14035743, -0.14700134, 0.2535575, -0.13697346, -0.13693139, -0.10365287), target2);
	target2 = MulAdd(nh2, MF4x4(0.4283934, -0.316192, -0.012617617, 0.018468965, 0.21436644, 0.18408814, -0.42651537, 0.12504087, -0.13894933, 0.091662176, -0.20096369, -0.080727175, -0.005487846, 0.17046383, 0.1383948, -0.0054956395), target2);
	target2 = MulAdd(ni2, MF4x4(0.20014295, -0.027282396, -0.06317007, 0.04452042, 0.064600386, 0.072222926, -0.33409226, 0.08063831, -0.022607977, 0.1308856, -0.39691743, -0.094889864, -0.1810531, 0.011367248, -0.2531222, -0.22468317), target2);

	MF3 result = tex5.SampleLevel(sam, pos, 0).rgb;
	result = MulAdd(e1, MF4x3(0.037410006, 0.048742272, 0.04348088, 0.037719514, 0.030768529, 0.03127472, 0.056426726, 0.03066893, 0.016440205, -0.010599352, 0.022832409, 0.023211194), result);
	result = MulAdd(e2, MF4x3(-0.005733291, 0.06365659, 0.06663611, -0.041917093, -0.016493445, -0.020438088, -0.0014357592, -0.0022506563, -0.0045095007, 0.029893145, -0.009129354, -0.015173116), result);
	result = MulAdd(ne1, MF4x3(0.013052085, 0.005108175, 0.0025906067, -0.021950055, -0.036447693, -0.036141638, -0.036296472, 0.0068928464, 0.013102313, 0.0060471976, -0.024798103, -0.023548538), result);
	result = MulAdd(ne2, MF4x3(0.0067743887, -0.06191211, -0.062355213, 0.0016080744, -0.020445071, -0.016840393, 0.028264903, 0.01852915, 0.015891539, -0.023877412, -0.013271666, -0.008158679), result);
	result = MulAdd(max(target1, 0), MF4x3(-0.04317466, -0.018953001, -0.020452993, -0.009322576, -0.03022352, -0.030970376, 0.05653658, 0.05430553, 0.046692245, 0.05615359, 0.059338935, 0.056018773), result);
	result = MulAdd(max(target2, 0), MF4x3(0.022878079, 0.03392234, 0.033057988, -0.017554542, -0.0141542535, -0.014122613, -0.048634093, -0.05316463, -0.047988772, -0.058002178, -0.040221967, -0.034025013), result);
	result = MulAdd(max(-target1, 0), MF4x3(-0.018253656, -0.04197674, -0.040467236, -0.04358929, -0.028309818, -0.025425073, -0.008488672, -0.001727991, 0.00035808363, -0.0011709273, 0.0052514165, 0.0059479307), result);
	result = MulAdd(max(-target2, 0), MF4x3(-0.08333935, -0.09818201, -0.09476284, -0.033692095, -0.046259012, -0.045797516, -0.007577072, 0.0022402718, 0.0016200038, 0.0029786075, -0.020728534, -0.018938033), result);
	result += MF3(0.047567394, -0.02504617, -0.028163986);

	result += INPUT.SampleLevel(sam, pos, 0).rgb;
	OUTPUT[gxy] = MF4(result, 1);
}
